diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index a80f571140666..eee0a94f6fc64 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -672,6 +672,7 @@ TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_tanh_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_rcp_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_rsq_bf16, "yy", "nc", "bf16-trans-insts") +TARGET_BUILTIN(__builtin_amdgcn_log_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts") diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 8d227a5f957c8..0312205d4ff8d 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -436,6 +436,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: return EmitAMDGPUDispatchPtr(*this, E); case AMDGPU::BI__builtin_amdgcn_logf: + case AMDGPU::BI__builtin_amdgcn_log_bf16: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log); case AMDGPU::BI__builtin_amdgcn_exp2f: return emitBuiltinWithOneOverloadedType<1>(*this, E, diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index 8b7ec143a2e00..bdf169a1a97da 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -99,6 +99,25 @@ void test_rsq_bf16(global __bf16* out, __bf16 a) *out = __builtin_amdgcn_rsq_bf16(a); } +// CHECK-LABEL: @test_log_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call bfloat @llvm.amdgcn.log.bf16(bfloat [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(1) [[TMP2]], align 2 +// CHECK-NEXT: ret void +// +void test_log_bf16(global __bf16* out, __bf16 a) +{ + *out = __builtin_amdgcn_log_bf16(a); +} + // CHECK-LABEL: @test_cvt_f16_fp8( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 6f8437e82700e..e1bc39302e126 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -532,6 +532,7 @@ defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>; defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>; defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>; defm V_RSQ_BF16 : VOP1Inst_t16 <"v_rsq_bf16", VOP_BF16_BF16, AMDGPUrsq>; +defm V_LOG_BF16 : VOP1Inst_t16 <"v_log_bf16", VOP_BF16_BF16, AMDGPUlogf16>; } } // End TRANS = 1, SchedRW = [WriteTrans32] defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; @@ -1143,6 +1144,7 @@ defm V_CVT_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x078>; defm V_RCP_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x079>; defm V_SQRT_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07a>; defm V_RSQ_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07b>; +defm V_LOG_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07c>; //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/test/CodeGen/AMDGPU/bf16-math.ll b/llvm/test/CodeGen/AMDGPU/bf16-math.ll new file mode 100644 index 0000000000000..05eee2d4d549d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/bf16-math.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s + +; TODO: Add global-isel when it can support bf16 + +define amdgpu_ps void @llvm_log2_bf16_v(ptr addrspace(1) %out, bfloat %src) { +; GCN-LABEL: llvm_log2_bf16_v: +; GCN: ; %bb.0: +; GCN-NEXT: v_log_bf16_e32 v2, v2 +; GCN-NEXT: global_store_b16 v[0:1], v2, off +; GCN-NEXT: s_endpgm + %log = call bfloat @llvm.log2.bf16(bfloat %src) + store bfloat %log, ptr addrspace(1) %out, align 2 + ret void +} + +define amdgpu_ps void @llvm_log2_bf16_s(ptr addrspace(1) %out, bfloat inreg %src) { +; GCN-LABEL: llvm_log2_bf16_s: +; GCN: ; %bb.0: +; GCN-NEXT: v_log_bf16_e32 v2, s0 +; GCN-NEXT: global_store_b16 v[0:1], v2, off +; GCN-NEXT: s_endpgm + %log = call bfloat @llvm.log2.bf16(bfloat %src) + store bfloat %log, ptr addrspace(1) %out, align 2 + ret void +} + +declare bfloat @llvm.log2.bf16(bfloat) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.bf16.ll new file mode 100644 index 0000000000000..a8b2077f5a35b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.bf16.ll @@ -0,0 +1,33 @@ +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN %s +; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s + +; FIXME: GlobalISel does not work with bf16 + +declare bfloat @llvm.amdgcn.log.bf16(bfloat) #0 + +; GCN-LABEL: {{^}}log_bf16: +; GCN: v_log_bf16_e32 {{v[0-9]+}}, {{s[0-9]+}} +define amdgpu_kernel void @log_bf16(ptr addrspace(1) %out, bfloat %src) #1 { + %log = call bfloat @llvm.amdgcn.log.bf16(bfloat %src) #0 + store bfloat %log, ptr addrspace(1) %out, align 2 + ret void +} + +; GCN-LABEL: {{^}}log_bf16_constant_4 +; GCN: v_log_bf16_e32 v0, 4.0 +define amdgpu_kernel void @log_bf16_constant_4(ptr addrspace(1) %out) #1 { + %log = call bfloat @llvm.amdgcn.log.bf16(bfloat 4.0) #0 + store bfloat %log, ptr addrspace(1) %out, align 2 + ret void +} + +; GCN-LABEL: {{^}}log_bf16_constant_100 +; GCN: v_log_bf16_e32 {{v[0-9]+}}, 0x42c8 +define amdgpu_kernel void @log_bf16_constant_100(ptr addrspace(1) %out) #1 { + %log = call bfloat @llvm.amdgcn.log.bf16(bfloat 100.0) #0 + store bfloat %log, ptr addrspace(1) %out, align 2 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.bf16.ll new file mode 100644 index 0000000000000..5bd9fa6f23aa0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.bf16.ll @@ -0,0 +1,240 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=GFX-SDAG-TRUE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefix=GFX-SDAG-FAKE16 %s +; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=GFX-GISEL-TRUE16 %s +; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefix=GFX-GISEL-FAKE16 %s + +define bfloat @v_log2_bf16(bfloat %in) { +; GFX-SDAG-TRUE16-LABEL: v_log2_bf16: +; GFX-SDAG-TRUE16: ; %bb.0: +; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.l, v0.l +; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31] +; +; GFX-SDAG-FAKE16-LABEL: v_log2_bf16: +; GFX-SDAG-FAKE16: ; %bb.0: +; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v0, v0 +; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31] + %result = call bfloat @llvm.log2.bf16(bfloat %in) + ret bfloat %result +} + +define bfloat @v_log2_fabs_bf16(bfloat %in) { +; GFX-SDAG-TRUE16-LABEL: v_log2_fabs_bf16: +; GFX-SDAG-TRUE16: ; %bb.0: +; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e64 v0.l, |v0.l| +; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31] +; +; GFX-SDAG-FAKE16-LABEL: v_log2_fabs_bf16: +; GFX-SDAG-FAKE16: ; %bb.0: +; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e64 v0, |v0| +; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31] + %fabs = call bfloat @llvm.fabs.bf16(bfloat %in) + %result = call bfloat @llvm.log2.bf16(bfloat %fabs) + ret bfloat %result +} + +define bfloat @v_log2_fneg_fabs_bf16(bfloat %in) { +; GFX-SDAG-TRUE16-LABEL: v_log2_fneg_fabs_bf16: +; GFX-SDAG-TRUE16: ; %bb.0: +; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e64 v0.l, -|v0.l| +; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31] +; +; GFX-SDAG-FAKE16-LABEL: v_log2_fneg_fabs_bf16: +; GFX-SDAG-FAKE16: ; %bb.0: +; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e64 v0, -|v0| +; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31] + %fabs = call bfloat @llvm.fabs.bf16(bfloat %in) + %fneg.fabs = fneg bfloat %fabs + %result = call bfloat @llvm.log2.bf16(bfloat %fneg.fabs) + ret bfloat %result +} + +define bfloat @v_log2_fneg_bf16(bfloat %in) { +; GFX-SDAG-TRUE16-LABEL: v_log2_fneg_bf16: +; GFX-SDAG-TRUE16: ; %bb.0: +; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e64 v0.l, -v0.l +; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31] +; +; GFX-SDAG-FAKE16-LABEL: v_log2_fneg_bf16: +; GFX-SDAG-FAKE16: ; %bb.0: +; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e64 v0, -v0 +; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31] + %fneg = fneg bfloat %in + %result = call bfloat @llvm.log2.bf16(bfloat %fneg) + ret bfloat %result +} + +define bfloat @v_log2_bf16_fast(bfloat %in) { +; GFX-SDAG-TRUE16-LABEL: v_log2_bf16_fast: +; GFX-SDAG-TRUE16: ; %bb.0: +; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.l, v0.l +; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31] +; +; GFX-SDAG-FAKE16-LABEL: v_log2_bf16_fast: +; GFX-SDAG-FAKE16: ; %bb.0: +; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v0, v0 +; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31] + %result = call fast bfloat @llvm.log2.bf16(bfloat %in) + ret bfloat %result +} + +define <2 x bfloat> @v_log2_v2bf16(<2 x bfloat> %in) { +; GFX-SDAG-TRUE16-LABEL: v_log2_v2bf16: +; GFX-SDAG-TRUE16: ; %bb.0: +; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.h, v0.h +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.l, v0.l +; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31] +; +; GFX-SDAG-FAKE16-LABEL: v_log2_v2bf16: +; GFX-SDAG-FAKE16: ; %bb.0: +; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v0, v0 +; GFX-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1) +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v1, v1 +; GFX-SDAG-FAKE16-NEXT: v_nop +; GFX-SDAG-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31] + %result = call <2 x bfloat> @llvm.log2.v2bf16(<2 x bfloat> %in) + ret <2 x bfloat> %result +} + +define <2 x bfloat> @v_log2_fabs_v2bf16(<2 x bfloat> %in) { +; GFX-SDAG-TRUE16-LABEL: v_log2_fabs_v2bf16: +; GFX-SDAG-TRUE16: ; %bb.0: +; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0 +; GFX-SDAG-TRUE16-NEXT: v_bfe_u32 v2, v0, 16, 15 +; GFX-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.l, v1.l +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.h, v2.l +; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31] +; +; GFX-SDAG-FAKE16-LABEL: v_log2_fabs_v2bf16: +; GFX-SDAG-FAKE16: ; %bb.0: +; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0 +; GFX-SDAG-FAKE16-NEXT: v_bfe_u32 v0, v0, 16, 15 +; GFX-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v1, v1 +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v0, v0 +; GFX-SDAG-FAKE16-NEXT: v_nop +; GFX-SDAG-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) +; GFX-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x5040100 +; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31] + %fabs = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> %in) + %result = call <2 x bfloat> @llvm.log2.v2bf16(<2 x bfloat> %fabs) + ret <2 x bfloat> %result +} + +define <2 x bfloat> @v_log2_fneg_fabs_v2bf16(<2 x bfloat> %in) { +; GFX-SDAG-TRUE16-LABEL: v_log2_fneg_fabs_v2bf16: +; GFX-SDAG-TRUE16: ; %bb.0: +; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0 +; GFX-SDAG-TRUE16-NEXT: v_bfe_u32 v2, v0, 16, 15 +; GFX-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e64 v0.l, -v1.l +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e64 v0.h, -v2.l +; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31] +; +; GFX-SDAG-FAKE16-LABEL: v_log2_fneg_fabs_v2bf16: +; GFX-SDAG-FAKE16: ; %bb.0: +; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0 +; GFX-SDAG-FAKE16-NEXT: v_bfe_u32 v0, v0, 16, 15 +; GFX-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e64 v1, -v1 +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e64 v0, -v0 +; GFX-SDAG-FAKE16-NEXT: v_nop +; GFX-SDAG-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) +; GFX-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x5040100 +; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31] + %fabs = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> %in) + %fneg.fabs = fneg <2 x bfloat> %fabs + %result = call <2 x bfloat> @llvm.log2.v2bf16(<2 x bfloat> %fneg.fabs) + ret <2 x bfloat> %result +} + +define <2 x bfloat> @v_log2_fneg_v2bf16(<2 x bfloat> %in) { +; GFX-SDAG-TRUE16-LABEL: v_log2_fneg_v2bf16: +; GFX-SDAG-TRUE16: ; %bb.0: +; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e64 v0.h, -v0.h +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e64 v0.l, -v0.l +; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31] +; +; GFX-SDAG-FAKE16-LABEL: v_log2_fneg_v2bf16: +; GFX-SDAG-FAKE16: ; %bb.0: +; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e64 v0, -v0 +; GFX-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1) +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e64 v1, -v1 +; GFX-SDAG-FAKE16-NEXT: v_nop +; GFX-SDAG-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31] + %fneg = fneg <2 x bfloat> %in + %result = call <2 x bfloat> @llvm.log2.v2bf16(<2 x bfloat> %fneg) + ret <2 x bfloat> %result +} + +define <2 x bfloat> @v_log2_v2bf16_fast(<2 x bfloat> %in) { +; GFX-SDAG-TRUE16-LABEL: v_log2_v2bf16_fast: +; GFX-SDAG-TRUE16: ; %bb.0: +; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.h, v0.h +; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.l, v0.l +; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31] +; +; GFX-SDAG-FAKE16-LABEL: v_log2_v2bf16_fast: +; GFX-SDAG-FAKE16: ; %bb.0: +; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 +; GFX-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v0, v0 +; GFX-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1) +; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v1, v1 +; GFX-SDAG-FAKE16-NEXT: v_nop +; GFX-SDAG-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31] + %result = call fast <2 x bfloat> @llvm.log2.v2bf16(<2 x bfloat> %in) + ret <2 x bfloat> %result +} + +declare bfloat @llvm.log2.bf16(bfloat) #0 +declare <2 x bfloat> @llvm.log2.v2bf16(<2 x bfloat>) #0 +declare bfloat @llvm.fabs.bf16(bfloat) #0 +declare <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat>) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s index 467418874592a..0f5ce56f1a2cf 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s @@ -208,6 +208,51 @@ v_rsq_bf16 v5, src_scc v_rsq_bf16 v127, 0x8000 // GFX1250: v_rsq_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf6,0xfe,0x7e,0x00,0x80,0x00,0x00] +v_log_bf16 v5, v1 +// GFX1250: v_log_bf16_e32 v5, v1 ; encoding: [0x01,0xf9,0x0a,0x7e] + +v_log_bf16 v5, v127 +// GFX1250: v_log_bf16_e32 v5, v127 ; encoding: [0x7f,0xf9,0x0a,0x7e] + +v_log_bf16 v5, s1 +// GFX1250: v_log_bf16_e32 v5, s1 ; encoding: [0x01,0xf8,0x0a,0x7e] + +v_log_bf16 v5, s105 +// GFX1250: v_log_bf16_e32 v5, s105 ; encoding: [0x69,0xf8,0x0a,0x7e] + +v_log_bf16 v5, vcc_lo +// GFX1250: v_log_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xf8,0x0a,0x7e] + +v_log_bf16 v5, vcc_hi +// GFX1250: v_log_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xf8,0x0a,0x7e] + +v_log_bf16 v5, ttmp15 +// GFX1250: v_log_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xf8,0x0a,0x7e] + +v_log_bf16 v5, m0 +// GFX1250: v_log_bf16_e32 v5, m0 ; encoding: [0x7d,0xf8,0x0a,0x7e] + +v_log_bf16 v5, exec_lo +// GFX1250: v_log_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xf8,0x0a,0x7e] + +v_log_bf16 v5, exec_hi +// GFX1250: v_log_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xf8,0x0a,0x7e] + +v_log_bf16 v5, null +// GFX1250: v_log_bf16_e32 v5, null ; encoding: [0x7c,0xf8,0x0a,0x7e] + +v_log_bf16 v5, -1 +// GFX1250: v_log_bf16_e32 v5, -1 ; encoding: [0xc1,0xf8,0x0a,0x7e] + +v_log_bf16 v5, 0.5 +// GFX1250: v_log_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xf8,0x0a,0x7e] + +v_log_bf16 v5, src_scc +// GFX1250: v_log_bf16_e32 v5, src_scc ; encoding: [0xfd,0xf8,0x0a,0x7e] + +v_log_bf16 v127, 0x8000 +// GFX1250: v_log_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf8,0xfe,0x7e,0x00,0x80,0x00,0x00] + v_cvt_f32_bf16 v5, v1 // GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s index 1d90f3fe345a5..9dd11e6249b27 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s @@ -220,6 +220,54 @@ v_rsq_bf16 v127, 0x8000 v_rsq_bf16 v5.h, v1.h // GFX1250: v_rsq_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xf7,0x0a,0x7f] +v_log_bf16 v5, v1 +// GFX1250: v_log_bf16_e32 v5, v1 ; encoding: [0x01,0xf9,0x0a,0x7e] + +v_log_bf16 v5, v127 +// GFX1250: v_log_bf16_e32 v5, v127 ; encoding: [0x7f,0xf9,0x0a,0x7e] + +v_log_bf16 v5, s1 +// GFX1250: v_log_bf16_e32 v5, s1 ; encoding: [0x01,0xf8,0x0a,0x7e] + +v_log_bf16 v5, s105 +// GFX1250: v_log_bf16_e32 v5, s105 ; encoding: [0x69,0xf8,0x0a,0x7e] + +v_log_bf16 v5, vcc_lo +// GFX1250: v_log_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xf8,0x0a,0x7e] + +v_log_bf16 v5, vcc_hi +// GFX1250: v_log_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xf8,0x0a,0x7e] + +v_log_bf16 v5, ttmp15 +// GFX1250: v_log_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xf8,0x0a,0x7e] + +v_log_bf16 v5, m0 +// GFX1250: v_log_bf16_e32 v5, m0 ; encoding: [0x7d,0xf8,0x0a,0x7e] + +v_log_bf16 v5, exec_lo +// GFX1250: v_log_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xf8,0x0a,0x7e] + +v_log_bf16 v5, exec_hi +// GFX1250: v_log_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xf8,0x0a,0x7e] + +v_log_bf16 v5, null +// GFX1250: v_log_bf16_e32 v5, null ; encoding: [0x7c,0xf8,0x0a,0x7e] + +v_log_bf16 v5, -1 +// GFX1250: v_log_bf16_e32 v5, -1 ; encoding: [0xc1,0xf8,0x0a,0x7e] + +v_log_bf16 v5, 0.5 +// GFX1250: v_log_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xf8,0x0a,0x7e] + +v_log_bf16 v5, src_scc +// GFX1250: v_log_bf16_e32 v5, src_scc ; encoding: [0xfd,0xf8,0x0a,0x7e] + +v_log_bf16 v127, 0x8000 +// GFX1250: v_log_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf8,0xfe,0x7e,0x00,0x80,0x00,0x00] + +v_log_bf16 v5.h, v1.h +// GFX1250: v_log_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xf9,0x0a,0x7f] + v_cvt_f32_bf16 v5, v1 // GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s index dd49e49e4b20b..3882e43b5daf4 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s @@ -226,6 +226,62 @@ v_rsq_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi // GFX1250: v_rsq_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf6,0xfe,0x7e,0x7f,0x6f,0x35,0x30] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_log_bf16 v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_log_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_log_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_mirror +// GFX1250: v_log_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_half_mirror +// GFX1250: v_log_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_shl:1 +// GFX1250: v_log_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_shl:15 +// GFX1250: v_log_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_shr:1 +// GFX1250: v_log_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_shr:15 +// GFX1250: v_log_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_ror:1 +// GFX1250: v_log_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_ror:15 +// GFX1250: v_log_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_log_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_log_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_log_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_log_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf8,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s index 3415e76188e78..2f849b15edee9 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s @@ -242,6 +242,66 @@ v_rsq_bf16 v5.h, v1.h quad_perm:[3,2,1,0] // GFX1250: v_rsq_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf6,0x0a,0x7f,0x81,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_log_bf16 v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_log_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_log_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_mirror +// GFX1250: v_log_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_half_mirror +// GFX1250: v_log_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_shl:1 +// GFX1250: v_log_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_shl:15 +// GFX1250: v_log_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_shr:1 +// GFX1250: v_log_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_shr:15 +// GFX1250: v_log_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_ror:1 +// GFX1250: v_log_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_ror:15 +// GFX1250: v_log_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_log_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_log_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_log_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_log_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf8,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5.h, v1.h quad_perm:[3,2,1,0] +// GFX1250: v_log_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7f,0x81,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s index 5cce927831f12..85cf08bdb3a31 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s @@ -50,6 +50,18 @@ v_rsq_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX1250: v_rsq_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf6,0xfe,0x7e,0x7f,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_log_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_log_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf8,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_log_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf8,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_log_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf8,0xfe,0x7e,0x7f,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s index 5ba421bf014ac..d9b320ac6c094 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s @@ -66,6 +66,22 @@ v_rsq_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_rsq_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf6,0x0a,0x7f,0x81,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_log_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_log_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf8,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_log_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf8,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_log_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf8,0xfe,0x7e,0x7f,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_log_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf8,0x0a,0x7f,0x81,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s index 31daff336fd48..0d4de4c8c877a 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s @@ -262,6 +262,51 @@ v_rsq_bf16_e64 v5, src_scc mul:4 v_rsq_bf16_e64 v255, -|0x8000| clamp div:2 // GFX1250: v_rsq_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfb,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] +v_log_bf16_e64 v5, v1 +// GFX1250: v_log_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x01,0x00,0x00] + +v_log_bf16_e64 v5, v255 +// GFX1250: v_log_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xfc,0xd5,0xff,0x01,0x00,0x00] + +v_log_bf16_e64 v5, s1 +// GFX1250: v_log_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x00,0x00,0x00] + +v_log_bf16_e64 v5, s105 +// GFX1250: v_log_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xfc,0xd5,0x69,0x00,0x00,0x00] + +v_log_bf16_e64 v5, vcc_lo +// GFX1250: v_log_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xfc,0xd5,0x6a,0x00,0x00,0x00] + +v_log_bf16_e64 v5, vcc_hi +// GFX1250: v_log_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xfc,0xd5,0x6b,0x00,0x00,0x00] + +v_log_bf16_e64 v5, ttmp15 +// GFX1250: v_log_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xfc,0xd5,0x7b,0x00,0x00,0x00] + +v_log_bf16_e64 v5, m0 +// GFX1250: v_log_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xfc,0xd5,0x7d,0x00,0x00,0x00] + +v_log_bf16_e64 v5, exec_lo +// GFX1250: v_log_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xfc,0xd5,0x7e,0x00,0x00,0x00] + +v_log_bf16_e64 v5, exec_hi +// GFX1250: v_log_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xfc,0xd5,0x7f,0x00,0x00,0x00] + +v_log_bf16_e64 v5, null +// GFX1250: v_log_bf16_e64 v5, null ; encoding: [0x05,0x00,0xfc,0xd5,0x7c,0x00,0x00,0x00] + +v_log_bf16_e64 v5, -1 +// GFX1250: v_log_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfc,0xd5,0xc1,0x00,0x00,0x00] + +v_log_bf16_e64 v5, 0.5 mul:2 +// GFX1250: v_log_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfc,0xd5,0xf0,0x00,0x00,0x08] + +v_log_bf16_e64 v5, src_scc mul:4 +// GFX1250: v_log_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfc,0xd5,0xfd,0x00,0x00,0x10] + +v_log_bf16_e64 v255, -|0x8000| clamp div:2 +// GFX1250: v_log_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfc,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] + v_cvt_f32_bf16_e64 v5, v1 // GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s index d270a34a30275..8bf5d242660b6 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s @@ -274,6 +274,54 @@ v_rsq_bf16_e64 v255, -|0x8000| clamp div:2 v_rsq_bf16 v5.h, v128.h // GFX1250: v_rsq_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfb,0xd5,0x80,0x01,0x00,0x00] +v_log_bf16_e64 v5, v1 +// GFX1250: v_log_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x01,0x00,0x00] + +v_log_bf16_e64 v5, v255 +// GFX1250: v_log_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xfc,0xd5,0xff,0x01,0x00,0x00] + +v_log_bf16_e64 v5, s1 +// GFX1250: v_log_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x00,0x00,0x00] + +v_log_bf16_e64 v5, s105 +// GFX1250: v_log_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xfc,0xd5,0x69,0x00,0x00,0x00] + +v_log_bf16_e64 v5, vcc_lo +// GFX1250: v_log_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xfc,0xd5,0x6a,0x00,0x00,0x00] + +v_log_bf16_e64 v5, vcc_hi +// GFX1250: v_log_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xfc,0xd5,0x6b,0x00,0x00,0x00] + +v_log_bf16_e64 v5, ttmp15 +// GFX1250: v_log_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xfc,0xd5,0x7b,0x00,0x00,0x00] + +v_log_bf16_e64 v5, m0 +// GFX1250: v_log_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xfc,0xd5,0x7d,0x00,0x00,0x00] + +v_log_bf16_e64 v5, exec_lo +// GFX1250: v_log_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xfc,0xd5,0x7e,0x00,0x00,0x00] + +v_log_bf16_e64 v5, exec_hi +// GFX1250: v_log_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xfc,0xd5,0x7f,0x00,0x00,0x00] + +v_log_bf16_e64 v5, null +// GFX1250: v_log_bf16_e64 v5, null ; encoding: [0x05,0x00,0xfc,0xd5,0x7c,0x00,0x00,0x00] + +v_log_bf16_e64 v5, -1 +// GFX1250: v_log_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfc,0xd5,0xc1,0x00,0x00,0x00] + +v_log_bf16_e64 v5, 0.5 mul:2 +// GFX1250: v_log_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfc,0xd5,0xf0,0x00,0x00,0x08] + +v_log_bf16_e64 v5, src_scc mul:4 +// GFX1250: v_log_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfc,0xd5,0xfd,0x00,0x00,0x10] + +v_log_bf16_e64 v255, -|0x8000| clamp div:2 +// GFX1250: v_log_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfc,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] + +v_log_bf16 v5.h, v128.h +// GFX1250: v_log_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfc,0xd5,0x80,0x01,0x00,0x00] + v_cvt_f32_bf16_e64 v5, v1 // GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s index d5b12002cb5ba..4231fcf7c5e92 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s @@ -226,6 +226,62 @@ v_rsq_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask // GFX1250: v_rsq_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_log_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_log_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_log_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_mirror +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_half_mirror +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_shl:1 +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_shl:15 +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_shr:1 +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_shr:15 +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_ror:1 +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_ror:15 +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_log_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_log_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_log_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s index 70961a901ffb7..1a094e285e730 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s @@ -242,6 +242,66 @@ v_rsq_bf16_e64_dpp v5.h, v128.h quad_perm:[3,2,1,0] // GFX1250: v_rsq_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xfb,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_log_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_log_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_log_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_mirror +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_half_mirror +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_shl:1 +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_shl:15 +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_shr:1 +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_shr:15 +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_ror:1 +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_ror:15 +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_log_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_log_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_log_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_log_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5.h, v128.h quad_perm:[3,2,1,0] +// GFX1250: v_log_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s index c8469f4188738..f6a2103ed9077 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s @@ -66,6 +66,22 @@ v_rsq_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX1250: v_rsq_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_log_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_log_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_log_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_log_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xfc,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_log_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfc,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s index f33ee9be04f82..5a1b1414dda37 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s @@ -82,6 +82,26 @@ v_rsq_bf16_e64_dpp v5.h, v128.h dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_rsq_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xfb,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_log_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_log_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_log_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_log_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xfc,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_log_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfc,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_log_bf16_e64_dpp v5.h, v128.h dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_log_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xfc,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt index 35b4ebc6f87f8..c318dd7fc4ee0 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt @@ -2,6 +2,9 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s +0xff,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf +# GFX1250: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] + 0xc1,0x3a,0x08,0x7e # GFX1250: v_mov_b64_e32 v[4:5], -1 ; encoding: [0xc1,0x3a,0x08,0x7e] @@ -278,6 +281,69 @@ 0x81,0xf7,0x0a,0x7f # GFX1250-REAL16: v_rsq_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xf7,0x0a,0x7f] +0xff,0xf8,0xfe,0x7e,0x00,0x80,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e32 v127.l, 0x8000 ; encoding: [0xff,0xf8,0xfe,0x7e,0x00,0x80,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xf8,0xfe,0x7e,0x00,0x80,0x00,0x00] + +0xc1,0xf8,0x0a,0x7e +# GFX1250-REAL16: v_log_bf16_e32 v5.l, -1 ; encoding: [0xc1,0xf8,0x0a,0x7e] +# GFX1250-FAKE16: v_log_bf16_e32 v5, -1 ; encoding: [0xc1,0xf8,0x0a,0x7e] + +0xf0,0xf8,0x0a,0x7e +# GFX1250-REAL16: v_log_bf16_e32 v5.l, 0.5 ; encoding: [0xf0,0xf8,0x0a,0x7e] +# GFX1250-FAKE16: v_log_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xf8,0x0a,0x7e] + +0x7f,0xf8,0x0a,0x7e +# GFX1250-REAL16: v_log_bf16_e32 v5.l, exec_hi ; encoding: [0x7f,0xf8,0x0a,0x7e] +# GFX1250-FAKE16: v_log_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xf8,0x0a,0x7e] + +0x7e,0xf8,0x0a,0x7e +# GFX1250-REAL16: v_log_bf16_e32 v5.l, exec_lo ; encoding: [0x7e,0xf8,0x0a,0x7e] +# GFX1250-FAKE16: v_log_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xf8,0x0a,0x7e] + +0x7d,0xf8,0x0a,0x7e +# GFX1250-REAL16: v_log_bf16_e32 v5.l, m0 ; encoding: [0x7d,0xf8,0x0a,0x7e] +# GFX1250-FAKE16: v_log_bf16_e32 v5, m0 ; encoding: [0x7d,0xf8,0x0a,0x7e] + +0x7c,0xf8,0x0a,0x7e +# GFX1250-REAL16: v_log_bf16_e32 v5.l, null ; encoding: [0x7c,0xf8,0x0a,0x7e] +# GFX1250-FAKE16: v_log_bf16_e32 v5, null ; encoding: [0x7c,0xf8,0x0a,0x7e] + +0x01,0xf8,0x0a,0x7e +# GFX1250-REAL16: v_log_bf16_e32 v5.l, s1 ; encoding: [0x01,0xf8,0x0a,0x7e] +# GFX1250-FAKE16: v_log_bf16_e32 v5, s1 ; encoding: [0x01,0xf8,0x0a,0x7e] + +0x69,0xf8,0x0a,0x7e +# GFX1250-REAL16: v_log_bf16_e32 v5.l, s105 ; encoding: [0x69,0xf8,0x0a,0x7e] +# GFX1250-FAKE16: v_log_bf16_e32 v5, s105 ; encoding: [0x69,0xf8,0x0a,0x7e] + +0xfd,0xf8,0x0a,0x7e +# GFX1250-REAL16: v_log_bf16_e32 v5.l, src_scc ; encoding: [0xfd,0xf8,0x0a,0x7e] +# GFX1250-FAKE16: v_log_bf16_e32 v5, src_scc ; encoding: [0xfd,0xf8,0x0a,0x7e] + +0x7b,0xf8,0x0a,0x7e +# GFX1250-REAL16: v_log_bf16_e32 v5.l, ttmp15 ; encoding: [0x7b,0xf8,0x0a,0x7e] +# GFX1250-FAKE16: v_log_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xf8,0x0a,0x7e] + +0x01,0xf9,0x0a,0x7e +# GFX1250-REAL16: v_log_bf16_e32 v5.l, v1.l ; encoding: [0x01,0xf9,0x0a,0x7e] +# GFX1250-FAKE16: v_log_bf16_e32 v5, v1 ; encoding: [0x01,0xf9,0x0a,0x7e] + +0x7f,0xf9,0x0a,0x7e +# GFX1250-REAL16: v_log_bf16_e32 v5.l, v127.l ; encoding: [0x7f,0xf9,0x0a,0x7e] +# GFX1250-FAKE16: v_log_bf16_e32 v5, v127 ; encoding: [0x7f,0xf9,0x0a,0x7e] + +0x6b,0xf8,0x0a,0x7e +# GFX1250-REAL16: v_log_bf16_e32 v5.l, vcc_hi ; encoding: [0x6b,0xf8,0x0a,0x7e] +# GFX1250-FAKE16: v_log_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xf8,0x0a,0x7e] + +0x6a,0xf8,0x0a,0x7e +# GFX1250-REAL16: v_log_bf16_e32 v5.l, vcc_lo ; encoding: [0x6a,0xf8,0x0a,0x7e] +# GFX1250-FAKE16: v_log_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xf8,0x0a,0x7e] + +0x81,0xf9,0x0a,0x7f +# GFX1250-REAL16: v_log_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xf9,0x0a,0x7f] + 0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00 # GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt index 0f98bced09d36..22ed09e957de7 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt @@ -238,6 +238,65 @@ 0xfa,0xf6,0x0a,0x7f,0x81,0x1b,0x00,0xff # GFX1250-REAL16: v_rsq_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf6,0x0a,0x7f,0x81,0x1b,0x00,0xff] +0xfa,0xf8,0xfe,0x7e,0x7f,0x6f,0x35,0x30 +# GFX1250-REAL16: v_log_bf16_dpp v127.l, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf8,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +# GFX1250-FAKE16: v_log_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xf8,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +0xfa,0xf8,0x0a,0x7e,0x01,0xe4,0x00,0xff +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +0xfa,0xf8,0x0a,0x7e,0x01,0x1b,0x00,0xff +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x1b,0x00,0xff] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +0xfa,0xf8,0x0a,0x7e,0x01,0x41,0x01,0xff +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x41,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x41,0x01,0xff] + +0xfa,0xf8,0x0a,0x7e,0x01,0x40,0x01,0xff +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x40,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x40,0x01,0xff] + +0xfa,0xf8,0x0a,0x7e,0x01,0x21,0x01,0xff +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x21,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x21,0x01,0xff] + +0xfa,0xf8,0x0a,0x7e,0x01,0x2f,0x01,0xff +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x2f,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +0xfa,0xf8,0x0a,0x7e,0x01,0x50,0x01,0xff +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x50,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x50,0x01,0xff] + +0xfa,0xf8,0x0a,0x7e,0x01,0x5f,0x01,0x01 +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x5f,0x01,0x01] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +0xfa,0xf8,0x0a,0x7e,0x01,0x01,0x01,0xff +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x01,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x01,0x01,0xff] + +0xfa,0xf8,0x0a,0x7e,0x01,0x0f,0x01,0xff +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x0f,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +0xfa,0xf8,0x0a,0x7e,0x01,0x11,0x01,0xff +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x11,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x11,0x01,0xff] + +0xfa,0xf8,0x0a,0x7e,0x01,0x1f,0x01,0xff +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x1f,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +0xfa,0xf8,0x0a,0x7e,0x01,0x60,0x09,0x13 +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x60,0x09,0x13] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xf8,0x0a,0x7e,0x01,0x60,0x09,0x13] + +0xfa,0xf8,0x0a,0x7f,0x81,0x1b,0x00,0xff +# GFX1250-REAL16: v_log_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf8,0x0a,0x7f,0x81,0x1b,0x00,0xff] + 0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30 # GFX1250: v_cvt_f32_bf16_dpp v127, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt index 1be1451c0f3ed..d8458e8808b39 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt @@ -65,6 +65,21 @@ 0xe9,0xf6,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX1250-REAL16: v_rsq_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf6,0x0a,0x7f,0x81,0x77,0x39,0x05] +0xe9,0xf8,0xfe,0x7e,0x7f,0x00,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf8,0xfe,0x7e,0x7f,0x00,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf8,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +0xe9,0xf8,0x0a,0x7e,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf8,0x0a,0x7e,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf8,0x0a,0x7e,0x01,0x77,0x39,0x05] + +0xea,0xf8,0x0a,0x7e,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_log_bf16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf8,0x0a,0x7e,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_log_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf8,0x0a,0x7e,0x01,0x77,0x39,0x05] + +0xe9,0xf8,0x0a,0x7f,0x81,0x77,0x39,0x05 +# GFX1250-REAL16: v_log_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf8,0x0a,0x7f,0x81,0x77,0x39,0x05] + 0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00 # GFX1250: v_cvt_f32_bf16_dpp v127, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt index 5285033fb34b9..d1a7158ce582e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt @@ -258,6 +258,70 @@ # GFX1250-REAL16: v_rsq_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfb,0xd5,0x80,0x01,0x00,0x00] # GFX1250-FAKE16: v_rsq_bf16_e64 v5, v128 ; encoding: [0x05,0x00,0xfb,0xd5,0x80,0x01,0x00,0x00] +0xff,0x81,0xfc,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64 v255.l, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfc,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64 v255, -|0x8000| clamp div:2 ; encoding: [0xff,0x81,0xfc,0xd5,0xff,0x00,0x00,0x38,0x00,0x80,0x00,0x00] + +0x05,0x00,0xfc,0xd5,0xc1,0x00,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xfc,0xd5,0xc1,0x00,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xfc,0xd5,0xc1,0x00,0x00,0x00] + +0x05,0x00,0xfc,0xd5,0xf0,0x00,0x00,0x08 +# GFX1250-REAL16: v_log_bf16_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0xfc,0xd5,0xf0,0x00,0x00,0x08] +# GFX1250-FAKE16: v_log_bf16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xfc,0xd5,0xf0,0x00,0x00,0x08] + +0x05,0x00,0xfc,0xd5,0x7f,0x00,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xfc,0xd5,0x7f,0x00,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xfc,0xd5,0x7f,0x00,0x00,0x00] + +0x05,0x00,0xfc,0xd5,0x7e,0x00,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64 v5.l, exec_lo ; encoding: [0x05,0x00,0xfc,0xd5,0x7e,0x00,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xfc,0xd5,0x7e,0x00,0x00,0x00] + +0x05,0x00,0xfc,0xd5,0x7d,0x00,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64 v5.l, m0 ; encoding: [0x05,0x00,0xfc,0xd5,0x7d,0x00,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xfc,0xd5,0x7d,0x00,0x00,0x00] + +0x05,0x00,0xfc,0xd5,0x7c,0x00,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64 v5.l, null ; encoding: [0x05,0x00,0xfc,0xd5,0x7c,0x00,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64 v5, null ; encoding: [0x05,0x00,0xfc,0xd5,0x7c,0x00,0x00,0x00] + +0x05,0x00,0xfc,0xd5,0x01,0x00,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64 v5.l, s1 ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x00,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x00,0x00,0x00] + +0x05,0x00,0xfc,0xd5,0x69,0x00,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64 v5.l, s105 ; encoding: [0x05,0x00,0xfc,0xd5,0x69,0x00,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xfc,0xd5,0x69,0x00,0x00,0x00] + +0x05,0x00,0xfc,0xd5,0xfd,0x00,0x00,0x10 +# GFX1250-REAL16: v_log_bf16_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0xfc,0xd5,0xfd,0x00,0x00,0x10] +# GFX1250-FAKE16: v_log_bf16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xfc,0xd5,0xfd,0x00,0x00,0x10] + +0x05,0x00,0xfc,0xd5,0x7b,0x00,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64 v5.l, ttmp15 ; encoding: [0x05,0x00,0xfc,0xd5,0x7b,0x00,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xfc,0xd5,0x7b,0x00,0x00,0x00] + +0x05,0x00,0xfc,0xd5,0x01,0x01,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64 v5.l, v1.l ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x01,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x01,0x00,0x00] + +0x05,0x00,0xfc,0xd5,0xff,0x01,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0xfc,0xd5,0xff,0x01,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xfc,0xd5,0xff,0x01,0x00,0x00] + +0x05,0x00,0xfc,0xd5,0x6b,0x00,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64 v5.l, vcc_hi ; encoding: [0x05,0x00,0xfc,0xd5,0x6b,0x00,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xfc,0xd5,0x6b,0x00,0x00,0x00] + +0x05,0x00,0xfc,0xd5,0x6a,0x00,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64 v5.l, vcc_lo ; encoding: [0x05,0x00,0xfc,0xd5,0x6a,0x00,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xfc,0xd5,0x6a,0x00,0x00,0x00] + +0x05,0x48,0xfc,0xd5,0x80,0x01,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64 v5.h, v128.h op_sel:[1,1] ; encoding: [0x05,0x48,0xfc,0xd5,0x80,0x01,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64 v5, v128 ; encoding: [0x05,0x00,0xfc,0xd5,0x80,0x01,0x00,0x00] + 0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00 # GFX1250: v_cvt_f32_bf8_e64 v1, 3 ; encoding: [0x01,0x00,0xed,0xd5,0x83,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt index d546ddff9f28c..56f65d0711664 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt @@ -62,6 +62,66 @@ # GFX1250-REAL16: v_rsq_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xfb,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] # GFX1250-FAKE16: v_rsq_bf16_e64_dpp v5, v128 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfb,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] +0xff,0x81,0xfc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30 +# GFX1250-REAL16: v_log_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xfc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] + +0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13 +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x48,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v128 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xfc,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] + 0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30 # GFX1250-REAL16: v_rcp_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] # GFX1250-FAKE16: v_rcp_bf16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xf9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt index ae5d331b2eea0..9ff9e54c1b40c 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt @@ -22,6 +22,26 @@ # GFX1250-REAL16: v_rsq_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xfb,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] # GFX1250-FAKE16: v_rsq_bf16_e64_dpp v5, v128 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfb,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +0xff,0x81,0xfc,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00 +# GFX1250-REAL16: v_log_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfc,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xfc,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +0x05,0x00,0xfc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +0x05,0x00,0xfc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +0x05,0x00,0xfc,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.l, v1.l mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xfc,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xfc,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +0x05,0x48,0xfc,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05 +# GFX1250-REAL16: v_log_bf16_e64_dpp v5.h, v128.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xfc,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +# GFX1250-FAKE16: v_log_bf16_e64_dpp v5, v128 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xfc,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] + 0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00 # GFX1250-REAL16: v_rcp_bf16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] # GFX1250-FAKE16: v_rcp_bf16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xf9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]