diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ba7e11a853347..2d337fafe6dc2 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16062,7 +16062,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_, case 'v': switch (BitWidth) { case 16: - RC = &AMDGPU::VGPR_32RegClass; + RC = Subtarget->useRealTrue16Insts() ? &AMDGPU::VGPR_16RegClass + : &AMDGPU::VGPR_32RegClass; break; default: RC = TRI->getVGPRClassForBitWidth(BitWidth); diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll new file mode 100644 index 0000000000000..d695a1fad604a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s + +define amdgpu_kernel void @s_input_output_i16() #0 { +; GFX11-LABEL: s_input_output_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: s_mov_b32 s0, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_and_b32 s0, s0, 0xffff +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm + %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"() + tail call void asm sideeffect "; use $0", "s"(i16 %v) #0 + ret void +} + +define amdgpu_kernel void @s_input_output_f16() #0 { +; GFX11-LABEL: s_input_output_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: s_mov_b32 s0, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm + %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0 + tail call void asm sideeffect "; use $0", "s"(half %v) + ret void +} + +define amdgpu_kernel void @v_input_output_f16() #0 { +; GFX11-LABEL: v_input_output_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: v_mov_b32 v0, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use v0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm + %v = tail call half asm sideeffect "v_mov_b32 $0, -1", "=v"() #0 + tail call void asm sideeffect "; use $0", "v"(half %v) + ret void +} + +define amdgpu_kernel void @v_input_output_i16() #0 { +; GFX11-LABEL: v_input_output_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: v_mov_b32 v0, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use v0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm + %v = tail call i16 asm sideeffect "v_mov_b32 $0, -1", "=v"() #0 + tail call void asm sideeffect "; use $0", "v"(i16 %v) + ret void +} + +define amdgpu_kernel void @i16_imm_input_phys_vgpr() { +; GFX11-LABEL: i16_imm_input_phys_vgpr: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use v0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm +entry: + call void asm sideeffect "; use $0 ", "{v0}"(i16 65535) + ret void +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll new file mode 100644 index 0000000000000..5c5dc09f573ee --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s + +define amdgpu_kernel void @s_input_output_i16() #0 { +; GFX11-LABEL: s_input_output_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: s_mov_b32 s0, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_and_b32 s0, s0, 0xffff +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm + %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"() + tail call void asm sideeffect "; use $0", "s"(i16 %v) #0 + ret void +} + +define amdgpu_kernel void @s_input_output_f16() #0 { +; GFX11-LABEL: s_input_output_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: s_mov_b32 s0, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm + %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0 + tail call void asm sideeffect "; use $0", "s"(half %v) + ret void +} + +define amdgpu_kernel void @v_input_output_f16() #0 { +; GFX11-LABEL: v_input_output_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: v_mov_b16 v0.l, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use v0.l +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm + %v = tail call half asm sideeffect "v_mov_b16 $0, -1", "=v"() #0 + tail call void asm sideeffect "; use $0", "v"(half %v) + ret void +} + +define amdgpu_kernel void @v_input_output_i16() #0 { +; GFX11-LABEL: v_input_output_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: v_mov_b16 v0.l, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use v0.l +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm + %v = tail call i16 asm sideeffect "v_mov_b16 $0, -1", "=v"() #0 + tail call void asm sideeffect "; use $0", "v"(i16 %v) + ret void +} + +define amdgpu_kernel void @i16_imm_input_phys_vgpr_lo() { +; GFX11-LABEL: i16_imm_input_phys_vgpr_lo: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: v_mov_b16_e32 v0.l, -1 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use v0.l +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm +entry: + call void asm sideeffect "; use $0 ", "{v0.l}"(i16 65535) + ret void +} + +define amdgpu_kernel void @i16_imm_input_phys_vgpr_hi() { +; GFX11-LABEL: i16_imm_input_phys_vgpr_hi: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: v_mov_b16_e32 v0.h, -1 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use v0.h +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm +entry: + call void asm sideeffect "; use $0 ", "{v0.h}"(i16 65535) + ret void +} + +attributes #0 = { nounwind }