Skip to content

Commit 1a86d44

Browse files
arsenmPravin Jagtap
andauthored
AMDGPU: MC support for v_cvt_scale_fp4<->f32 of gfx950. (#117417)
OPSEL ASM Syntax for v_cvt_scalef32_pk_f32_fp4 : opsel:[x,y,z] where, x & y i.e. OPSEL[1 : 0] selects which src_byte to read. OPSEL ASM Syntax for v_cvt_scalef32_pk_fp4_f32 : opsel:[a,b,c,d] where, c & d i.e. OPSEL[3 : 2] selects which dst_byte to write. Co-authored-by: Pravin Jagtap <[email protected]>
1 parent d7c20a6 commit 1a86d44

File tree

6 files changed

+222
-8
lines changed

6 files changed

+222
-8
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -396,11 +396,17 @@ def FeatureBF8ConversionScaleInsts : SubtargetFeature<"bf8-cvt-scale-insts",
396396
"Has bf8 conversion scale instructions"
397397
>;
398398

399+
def FeatureFP4ConversionScaleInsts : SubtargetFeature<"fp4-cvt-scale-insts",
400+
"HasFP4ConversionScaleInsts",
401+
"true",
402+
"Has fp4 conversion scale instructions"
403+
>;
404+
399405
def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
400406
"GFX950Insts",
401407
"true",
402408
"Additional instructions for GFX950+",
403-
[FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts]
409+
[FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts, FeatureFP4ConversionScaleInsts]
404410
>;
405411

406412
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
@@ -1545,7 +1551,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
15451551
FeatureBF16ConversionInsts,
15461552
FeatureBitOp3Insts,
15471553
FeatureFP8ConversionScaleInsts,
1548-
FeatureBF8ConversionScaleInsts
1554+
FeatureBF8ConversionScaleInsts,
1555+
FeatureFP4ConversionScaleInsts
15491556
])>;
15501557

15511558
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -2425,6 +2432,9 @@ def HasFP8ConversionScaleInsts : Predicate<"Subtarget->hasFP8ConversionScaleInst
24252432
def HasBF8ConversionScaleInsts : Predicate<"Subtarget->hasBF8ConversionScaleInsts()">,
24262433
AssemblerPredicate<(all_of FeatureBF8ConversionScaleInsts)>;
24272434

2435+
def HasFP4ConversionScaleInsts : Predicate<"Subtarget->hasFP4ConversionScaleInsts()">,
2436+
AssemblerPredicate<(all_of FeatureFP4ConversionScaleInsts)>;
2437+
24282438
def HasGDS : Predicate<"Subtarget->hasGDS()">;
24292439

24302440
def HasGWS : Predicate<"Subtarget->hasGWS()">;

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ class AMDGPUSubtarget {
5252
bool HasTrue16BitInsts = false;
5353
bool HasFP8ConversionScaleInsts = false;
5454
bool HasBF8ConversionScaleInsts = false;
55+
bool HasFP4ConversionScaleInsts = false;
5556
bool EnableRealTrue16Insts = false;
5657
bool HasBF16ConversionInsts = false;
5758
bool HasMadMixInsts = false;
@@ -181,6 +182,8 @@ class AMDGPUSubtarget {
181182

182183
bool hasBF8ConversionScaleInsts() const { return HasBF8ConversionScaleInsts; }
183184

185+
bool hasFP4ConversionScaleInsts() const { return HasFP4ConversionScaleInsts; }
186+
184187
bool hasMadMacF32Insts() const {
185188
return HasMadMacF32Insts || !isGCN();
186189
}

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -887,7 +887,7 @@ class VOP3_CVT_SCALE_F1632_FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPPro
887887
let HasOMod = 0;
888888
}
889889

890-
def VOP3_CVT_SCALE_FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f32, f32]>,
890+
def VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f32, f32]>,
891891
VOP3_OPSEL> {
892892
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
893893
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
@@ -899,7 +899,7 @@ def VOP3_CVT_SCALE_FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f32,
899899
let HasOMod = 0;
900900
}
901901

902-
def VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile : VOP3_Profile<VOPProfile<[v2f32, i32, f32, untyped]>,
902+
def VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile : VOP3_Profile<VOPProfile<[v2f32, i32, f32, untyped]>,
903903
VOP3_OPSEL> {
904904
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
905905
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
@@ -928,21 +928,26 @@ def VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile : VOP3_Profile<VOPProfile<[i32, v2f
928928
let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
929929
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
930930
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
931-
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
932-
defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>;
931+
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>;
932+
defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>;
933933
defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
934934
defm V_CVT_SCALEF32_PK_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
935935
}
936936

937937
let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
938938
defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
939939
defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
940-
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
941-
defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>;
940+
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>;
941+
defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>;
942942
defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
943943
defm V_CVT_SCALEF32_PK_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
944944
}
945945

946+
let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in {
947+
defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>;
948+
defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>;
949+
}
950+
946951
let SubtargetPredicate = isGFX10Plus in {
947952
let isCommutable = 1, isReMaterializable = 1 in {
948953
defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -1881,3 +1886,7 @@ defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23a>;
18811886
defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3OpSel_Real_gfx9 <0x241>;
18821887
defm V_CVT_SCALEF32_PK_BF8_BF16: VOP3OpSel_Real_gfx9 <0x245>;
18831888
}
1889+
let OtherPredicates = [HasFP4ConversionScaleInsts] in {
1890+
defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3OpSel_Real_gfx9 <0x23f>;
1891+
defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3OpSel_Real_gfx9 <0x23d>;
1892+
}

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,3 +693,99 @@ v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1]
693693
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
694694
// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00]
695695
v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1]
696+
697+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
698+
// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x02,0x00]
699+
v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3
700+
701+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
702+
// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x00,0x00]
703+
v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3
704+
705+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
706+
// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x06,0x01,0x00]
707+
v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3
708+
709+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
710+
// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x02,0x00]
711+
v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,0,0]
712+
713+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
714+
// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x00,0x00]
715+
v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,0,0]
716+
717+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
718+
// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x06,0x01,0x00]
719+
v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,0,0]
720+
721+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
722+
// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x02,0x00]
723+
v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[0,1,0]
724+
725+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
726+
// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x00,0x00]
727+
v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[0,1,0]
728+
729+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
730+
// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x06,0x01,0x00]
731+
v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[0,1,0]
732+
733+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
734+
// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x02,0x00]
735+
v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,1,0]
736+
737+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
738+
// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x00,0x00]
739+
v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,1,0]
740+
741+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
742+
// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x06,0x01,0x00]
743+
v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,1,0]
744+
745+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
746+
// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0e,0x04]
747+
v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3
748+
749+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
750+
// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x3d,0xd2,0x01,0x05,0x0e,0x44]
751+
v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3|
752+
753+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
754+
// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0c,0x02]
755+
v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3
756+
757+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
758+
// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0e,0x04]
759+
v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,0]
760+
761+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
762+
// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,0] ; encoding: [0x01,0x24,0x3d,0xd2,0x01,0x05,0x0e,0x44]
763+
v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,0]
764+
765+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
766+
// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0c,0x02]
767+
v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,0]
768+
769+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
770+
// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0e,0x04]
771+
v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,0,1]
772+
773+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
774+
// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x3d,0xd2,0x01,0x05,0x0e,0x44]
775+
v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1]
776+
777+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
778+
// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0c,0x02]
779+
v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,0,1]
780+
781+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
782+
// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0e,0x04]
783+
v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,1]
784+
785+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
786+
// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1] ; encoding: [0x01,0x64,0x3d,0xd2,0x01,0x05,0x0e,0x44]
787+
v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1]
788+
789+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
790+
// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02]
791+
v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1]

llvm/test/MC/AMDGPU/gfx950_err.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,27 @@ v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 div:2
7777

7878
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
7979
v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 clamp div:2
80+
81+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
82+
v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 clamp
83+
84+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
85+
v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 mul:2
86+
87+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
88+
v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 div:2
89+
90+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
91+
v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 clamp div:2
92+
93+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
94+
v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 clamp
95+
96+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
97+
v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 mul:2
98+
99+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
100+
v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 div:2
101+
102+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
103+
v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 clamp div:2

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,3 +467,75 @@
467467

468468
# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00]
469469
0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00
470+
471+
# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x02,0x00]
472+
0x02,0x00,0x3f,0xd2,0x02,0x07,0x02,0x00
473+
474+
# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x00,0x00]
475+
0x02,0x00,0x3f,0xd2,0x02,0x07,0x00,0x00
476+
477+
# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x06,0x01,0x00]
478+
0x02,0x00,0x3f,0xd2,0x02,0x06,0x01,0x00
479+
480+
# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x02,0x00]
481+
0x02,0x08,0x3f,0xd2,0x02,0x07,0x02,0x00
482+
483+
# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x00,0x00]
484+
0x02,0x08,0x3f,0xd2,0x02,0x07,0x00,0x00
485+
486+
# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x06,0x01,0x00]
487+
0x02,0x08,0x3f,0xd2,0x02,0x06,0x01,0x00
488+
489+
# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x02,0x00]
490+
0x02,0x10,0x3f,0xd2,0x02,0x07,0x02,0x00
491+
492+
# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x00,0x00]
493+
0x02,0x10,0x3f,0xd2,0x02,0x07,0x00,0x00
494+
495+
# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x06,0x01,0x00]
496+
0x02,0x10,0x3f,0xd2,0x02,0x06,0x01,0x00
497+
498+
# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x02,0x00]
499+
0x02,0x18,0x3f,0xd2,0x02,0x07,0x02,0x00
500+
501+
# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x00,0x00]
502+
0x02,0x18,0x3f,0xd2,0x02,0x07,0x00,0x00
503+
504+
# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x06,0x01,0x00]
505+
0x02,0x18,0x3f,0xd2,0x02,0x06,0x01,0x00
506+
507+
# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0e,0x04]
508+
0x01,0x00,0x3d,0xd2,0x01,0x05,0x0e,0x04
509+
510+
# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x3d,0xd2,0x01,0x05,0x0e,0x44]
511+
0x01,0x04,0x3d,0xd2,0x01,0x05,0x0e,0x44
512+
513+
# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0c,0x02]
514+
0x01,0x00,0x3d,0xd2,0x01,0x05,0x0c,0x02
515+
516+
# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0e,0x04]
517+
0x01,0x20,0x3d,0xd2,0x01,0x05,0x0e,0x04
518+
519+
# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,0] ; encoding: [0x01,0x24,0x3d,0xd2,0x01,0x05,0x0e,0x44]
520+
0x01,0x24,0x3d,0xd2,0x01,0x05,0x0e,0x44
521+
522+
# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0c,0x02]
523+
0x01,0x20,0x3d,0xd2,0x01,0x05,0x0c,0x02
524+
525+
# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0e,0x04]
526+
0x01,0x40,0x3d,0xd2,0x01,0x05,0x0e,0x04
527+
528+
# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x3d,0xd2,0x01,0x05,0x0e,0x44]
529+
0x01,0x44,0x3d,0xd2,0x01,0x05,0x0e,0x44
530+
531+
# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0c,0x02]
532+
0x01,0x40,0x3d,0xd2,0x01,0x05,0x0c,0x02
533+
534+
# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0e,0x04]
535+
0x01,0x60,0x3d,0xd2,0x01,0x05,0x0e,0x04
536+
537+
# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1] ; encoding: [0x01,0x64,0x3d,0xd2,0x01,0x05,0x0e,0x44]
538+
0x01,0x64,0x3d,0xd2,0x01,0x05,0x0e,0x44
539+
540+
# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02]
541+
0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02

0 commit comments

Comments
 (0)