diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 4758f6053ccb6..14c1746716cdd 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -616,8 +616,8 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f32, "V6UiV32fUif", "nc TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16, "V6UiV32yUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f16, "V6UiV32hUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f32, "V6UiV32fUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts") -TARGET_BUILTIN(__builtin_amdgcn_bitop3_b32, "iiiiIUc", "nc", "bitop3-insts") -TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "ssssIUc", "nc", "bitop3-insts") +TARGET_BUILTIN(__builtin_amdgcn_bitop3_b32, "iiiiIUi", "nc", "bitop3-insts") +TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "ssssIUi", "nc", "bitop3-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts") diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl index f1259ef678f1e..8251d6c213e3d 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl @@ -1673,7 +1673,7 @@ void test_cvt_scalef32_sr_fp8_f32(global unsigned *out, float src, uint seed, fl // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[B_ADDR]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[C_ADDR]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.bitop3.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i8 1) +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.bitop3.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 1) // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 // CHECK-NEXT: ret void @@ -1696,7 +1696,7 @@ void test_bitop3_b32(global uint* out, uint a, uint b, uint c) // CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(5) [[A_ADDR]], align 2 // CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr addrspace(5) [[B_ADDR]], align 2 // CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(5) [[C_ADDR]], align 2 -// CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.amdgcn.bitop3.i16(i16 [[TMP0]], i16 [[TMP1]], i16 [[TMP2]], i8 1) +// CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.amdgcn.bitop3.i16(i16 [[TMP0]], i16 [[TMP1]], i16 [[TMP2]], i32 1) // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 // CHECK-NEXT: store i16 [[TMP3]], ptr addrspace(1) [[TMP4]], align 2 // CHECK-NEXT: ret void diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 112c26d20db14..92418b9104ad1 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -774,7 +774,7 @@ def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic< def int_amdgcn_bitop3 : DefaultAttrsIntrinsic<[llvm_anyint_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i8_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, ImmArg>]>; } // TargetPrefix = "amdgcn" diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index d8eb9d155315a..fc8c12a674e46 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1271,7 +1271,7 @@ def ByteSel : NamedIntOperand<"byte_sel"> { let Validator = "isUInt<2>"; } -def BitOp3 : CustomOperand; +def BitOp3 : CustomOperand; def bitop3_0 : DefaultOperand; class KImmFPOperand : ImmOperand { diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index ff9376e635af9..a00785bf29c77 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1291,28 +1291,28 @@ let SubtargetPredicate = isGFX12Plus in { let SubtargetPredicate = HasBitOp3Insts in { let isReMaterializable = 1 in { defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16", - VOP3_BITOP3_Profile>, + VOP3_BITOP3_Profile>, VOP3_OPSEL>>; defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32", - VOP3_BITOP3_Profile, VOP3_REGULAR>>; + VOP3_BITOP3_Profile, VOP3_REGULAR>>; } def : GCNPat< - (i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i8:$bitop3)), + (i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)), (i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3)) >; def : GCNPat< - (i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i8:$bitop3)), + (i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)), (i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0)) >; def : GCNPat< - (i32 (BITOP3_32 i32:$src0, i32:$src1, i32:$src2, i8:$bitop3)), + (i32 (BITOP3_32 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)), (i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3)) >; def : GCNPat< - (i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i8:$bitop3)), + (i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)), (i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0)) >; } // End SubtargetPredicate = HasBitOp3Insts diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll index ff2f4db0d7a5f..b6232cbc38496 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll @@ -2,15 +2,15 @@ ; RUN: llc -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950-SDAG %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950-GISEL %s -declare i32 @llvm.amdgcn.bitop3.i32(i32, i32, i32, i8) -declare i16 @llvm.amdgcn.bitop3.i16(i16, i16, i16, i8) +declare i32 @llvm.amdgcn.bitop3.i32(i32, i32, i32, i32) +declare i16 @llvm.amdgcn.bitop3.i16(i16, i16, i16, i32) define amdgpu_ps float @bitop3_b32_vvv(i32 %a, i32 %b, i32 %c) { ; GCN-LABEL: bitop3_b32_vvv: ; GCN: ; %bb.0: ; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0xf ; GCN-NEXT: ; return to shader part epilog - %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 15) + %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 15) %ret_cast = bitcast i32 %ret to float ret float %ret_cast } @@ -20,7 +20,7 @@ define amdgpu_ps float @bitop3_b32_svv(i32 inreg %a, i32 %b, i32 %c) { ; GCN: ; %bb.0: ; GCN-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:0x10 ; GCN-NEXT: ; return to shader part epilog - %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 16) + %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 16) %ret_cast = bitcast i32 %ret to float ret float %ret_cast } @@ -31,7 +31,7 @@ define amdgpu_ps float @bitop3_b32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) { ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: v_bitop3_b32 v0, s0, v1, v0 bitop3:0x11 ; GCN-NEXT: ; return to shader part epilog - %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 17) + %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 17) %ret_cast = bitcast i32 %ret to float ret float %ret_cast } @@ -43,7 +43,7 @@ define amdgpu_ps float @bitop3_b32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c) ; GCN-NEXT: v_mov_b32_e32 v1, s2 ; GCN-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:0x12 ; GCN-NEXT: ; return to shader part epilog - %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 18) + %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 18) %ret_cast = bitcast i32 %ret to float ret float %ret_cast } @@ -60,7 +60,7 @@ define amdgpu_ps float @bitop3_b32_vvi(i32 %a, i32 %b) { ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8 ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x13 ; GFX950-GISEL-NEXT: ; return to shader part epilog - %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 1000, i8 19) + %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 1000, i32 19) %ret_cast = bitcast i32 %ret to float ret float %ret_cast } @@ -79,7 +79,7 @@ define amdgpu_ps float @bitop3_b32_vii(i32 %a) { ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8 ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x14 ; GFX950-GISEL-NEXT: ; return to shader part epilog - %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 2000, i32 1000, i8 20) + %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 2000, i32 1000, i32 20) %ret_cast = bitcast i32 %ret to float ret float %ret_cast } @@ -102,7 +102,7 @@ define amdgpu_ps float @bitop3_b32_iii() { ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8 ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x15 ; GFX950-GISEL-NEXT: ; return to shader part epilog - %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 3000, i32 2000, i32 1000, i8 21) + %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 3000, i32 2000, i32 1000, i32 21) %ret_cast = bitcast i32 %ret to float ret float %ret_cast } @@ -112,7 +112,7 @@ define amdgpu_ps half @bitop3_b16_vvv(i16 %a, i16 %b, i16 %c) { ; GCN: ; %bb.0: ; GCN-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0xf ; GCN-NEXT: ; return to shader part epilog - %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 15) + %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 15) %ret_cast = bitcast i16 %ret to half ret half %ret_cast } @@ -122,7 +122,7 @@ define amdgpu_ps half @bitop3_b16_svv(i16 inreg %a, i16 %b, i16 %c) { ; GCN: ; %bb.0: ; GCN-NEXT: v_bitop3_b16 v0, s0, v0, v1 bitop3:0x10 ; GCN-NEXT: ; return to shader part epilog - %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 16) + %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 16) %ret_cast = bitcast i16 %ret to half ret half %ret_cast } @@ -133,7 +133,7 @@ define amdgpu_ps half @bitop3_b16_ssv(i16 inreg %a, i16 inreg %b, i16 %c) { ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: v_bitop3_b16 v0, s0, v1, v0 bitop3:0x11 ; GCN-NEXT: ; return to shader part epilog - %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 17) + %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 17) %ret_cast = bitcast i16 %ret to half ret half %ret_cast } @@ -145,7 +145,7 @@ define amdgpu_ps half @bitop3_b16_sss(i16 inreg %a, i16 inreg %b, i16 inreg %c) ; GCN-NEXT: v_mov_b32_e32 v1, s2 ; GCN-NEXT: v_bitop3_b16 v0, s0, v0, v1 bitop3:0x12 ; GCN-NEXT: ; return to shader part epilog - %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 18) + %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 18) %ret_cast = bitcast i16 %ret to half ret half %ret_cast } @@ -162,7 +162,7 @@ define amdgpu_ps half @bitop3_b16_vvi(i16 %a, i16 %b) { ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8 ; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x13 ; GFX950-GISEL-NEXT: ; return to shader part epilog - %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 1000, i8 19) + %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 1000, i32 19) %ret_cast = bitcast i16 %ret to half ret half %ret_cast } @@ -181,7 +181,7 @@ define amdgpu_ps half @bitop3_b16_vii(i16 %a) { ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8 ; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x14 ; GFX950-GISEL-NEXT: ; return to shader part epilog - %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 2000, i16 1000, i8 20) + %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 2000, i16 1000, i32 20) %ret_cast = bitcast i16 %ret to half ret half %ret_cast } @@ -203,7 +203,7 @@ define amdgpu_ps half @bitop3_b16_iii() { ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8 ; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x15 ; GFX950-GISEL-NEXT: ; return to shader part epilog - %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 3000, i16 2000, i16 1000, i8 21) + %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 3000, i16 2000, i16 1000, i32 21) %ret_cast = bitcast i16 %ret to half ret half %ret_cast }