From c1b6d3520437838ab3ba571fa52992a1c35361ef Mon Sep 17 00:00:00 2001 From: guochen2 Date: Wed, 21 May 2025 14:47:40 -0400 Subject: [PATCH 1/2] 16bit for asm inline reg --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 +- .../CodeGen/AMDGPU/inlineasm-16-fake16.ll | 48 ++++++++++++++++ .../CodeGen/AMDGPU/inlineasm-16-true16.ll | 57 +++++++++++++++++++ 3 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll create mode 100644 llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ba7e11a853347..2d337fafe6dc2 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16062,7 +16062,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_, case 'v': switch (BitWidth) { case 16: - RC = &AMDGPU::VGPR_32RegClass; + RC = Subtarget->useRealTrue16Insts() ? &AMDGPU::VGPR_16RegClass + : &AMDGPU::VGPR_32RegClass; break; default: RC = TRI->getVGPRClassForBitWidth(BitWidth); diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll new file mode 100644 index 0000000000000..ecc3d2326f6e2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll @@ -0,0 +1,48 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s + +; GFX11-LABEL: {{^}}s_input_output_i16: +; GFX11: s_mov_b32 s[[REG:[0-9]+]], -1 +; GFX11: ; use s[[REG]] +define amdgpu_kernel void @s_input_output_i16() #0 { + %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"() + tail call void asm sideeffect "; use $0", "s"(i16 %v) #0 + ret void +} + +; GFX11-LABEL: {{^}}s_input_output_f16: +; GFX11: s_mov_b32 s[[REG:[0-9]+]], -1 +; GFX11: ; use s[[REG]] +define amdgpu_kernel void @s_input_output_f16() #0 { + %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0 + tail call void asm sideeffect "; use $0", "s"(half %v) + ret void +} + +; GFX11-LABEL: {{^}}v_input_output_f16: +; GFX11: v_mov_b32 v[[REG:[0-9]+]], -1 +; GFX11: ; use v[[REG]] +define amdgpu_kernel void @v_input_output_f16() #0 { + %v = tail call half asm sideeffect "v_mov_b32 $0, -1", "=v"() #0 + tail call void asm sideeffect "; use $0", "v"(half %v) + ret void +} + +; GFX11-LABEL: {{^}}v_input_output_i16: +; GFX11: v_mov_b32 v[[REG:[0-9]+]], -1 +; GFX11: ; use v[[REG]] +define amdgpu_kernel void @v_input_output_i16() #0 { + %v = tail call i16 asm sideeffect "v_mov_b32 $0, -1", "=v"() #0 + tail call void asm sideeffect "; use $0", "v"(i16 %v) + ret void +} + +; GFX11-LABEL: {{^}}i16_imm_input_phys_vgpr: +; GFX11: v_mov_b32_e32 v0, 0xffff +; GFX11: ; use v0 +define amdgpu_kernel void @i16_imm_input_phys_vgpr() { +entry: + call void asm sideeffect "; use $0 ", "{v0}"(i16 65535) + ret void +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll new file mode 100644 index 0000000000000..17905b9101c7a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll @@ -0,0 +1,57 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s + +; GFX11-LABEL: {{^}}s_input_output_i16: +; GFX11: s_mov_b32 s[[REG:[0-9]+]], -1 +; GFX11: ; use s[[REG]] +define amdgpu_kernel void @s_input_output_i16() #0 { + %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"() + tail call void asm sideeffect "; use $0", "s"(i16 %v) #0 + ret void +} + +; GFX11-LABEL: {{^}}s_input_output_f16: +; GFX11: s_mov_b32 s[[REG:[0-9]+]], -1 +; GFX11: ; use s[[REG]] +define amdgpu_kernel void @s_input_output_f16() #0 { + %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0 + tail call void asm sideeffect "; use $0", "s"(half %v) + ret void +} + +; GFX11-LABEL: {{^}}v_input_output_f16: +; GFX11: v_mov_b16 v[[REG:[0-9]+.(l|h)]], -1 +; GFX11: ; use v[[REG]] +define amdgpu_kernel void @v_input_output_f16() #0 { + %v = tail call half asm sideeffect "v_mov_b16 $0, -1", "=v"() #0 + tail call void asm sideeffect "; use $0", "v"(half %v) + ret void +} + +; GFX11-LABEL: {{^}}v_input_output_i16: +; GFX11: v_mov_b16 v[[REG:[0-9]+.(l|h)]], -1 +; GFX11: ; use v[[REG]] +define amdgpu_kernel void @v_input_output_i16() #0 { + %v = tail call i16 asm sideeffect "v_mov_b16 $0, -1", "=v"() #0 + tail call void asm sideeffect "; use $0", "v"(i16 %v) + ret void +} + +; GFX11-LABEL: {{^}}i16_imm_input_phys_vgpr_lo: +; GFX11: v_mov_b16_e32 v0.l, -1 +; GFX11: ; use v0.l +define amdgpu_kernel void @i16_imm_input_phys_vgpr_lo() { +entry: + call void asm sideeffect "; use $0 ", "{v0.l}"(i16 65535) + ret void +} + +; GFX11-LABEL: {{^}}i16_imm_input_phys_vgpr_hi: +; GFX11: v_mov_b16_e32 v0.h, -1 +; GFX11: ; use v0.h +define amdgpu_kernel void @i16_imm_input_phys_vgpr_hi() { +entry: + call void asm sideeffect "; use $0 ", "{v0.h}"(i16 65535) + ret void +} + +attributes #0 = { nounwind } From 67d4d9fd3dd3cbdb352771984ca737f6951ab1b8 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Wed, 21 May 2025 16:42:50 -0400 Subject: [PATCH 2/2] auto generate check line --- .../CodeGen/AMDGPU/inlineasm-16-fake16.ll | 61 ++++++++++++---- .../CodeGen/AMDGPU/inlineasm-16-true16.ll | 70 ++++++++++++++----- 2 files changed, 98 insertions(+), 33 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll index ecc3d2326f6e2..d695a1fad604a 100644 --- a/llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll +++ b/llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll @@ -1,45 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s -; GFX11-LABEL: {{^}}s_input_output_i16: -; GFX11: s_mov_b32 s[[REG:[0-9]+]], -1 -; GFX11: ; use s[[REG]] define amdgpu_kernel void @s_input_output_i16() #0 { +; GFX11-LABEL: s_input_output_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: s_mov_b32 s0, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_and_b32 s0, s0, 0xffff +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"() tail call void asm sideeffect "; use $0", "s"(i16 %v) #0 ret void } -; GFX11-LABEL: {{^}}s_input_output_f16: -; GFX11: s_mov_b32 s[[REG:[0-9]+]], -1 -; GFX11: ; use s[[REG]] define amdgpu_kernel void @s_input_output_f16() #0 { +; GFX11-LABEL: s_input_output_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: s_mov_b32 s0, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0 tail call void asm sideeffect "; use $0", "s"(half %v) ret void } -; GFX11-LABEL: {{^}}v_input_output_f16: -; GFX11: v_mov_b32 v[[REG:[0-9]+]], -1 -; GFX11: ; use v[[REG]] define amdgpu_kernel void @v_input_output_f16() #0 { +; GFX11-LABEL: v_input_output_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: v_mov_b32 v0, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use v0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm %v = tail call half asm sideeffect "v_mov_b32 $0, -1", "=v"() #0 tail call void asm sideeffect "; use $0", "v"(half %v) ret void } -; GFX11-LABEL: {{^}}v_input_output_i16: -; GFX11: v_mov_b32 v[[REG:[0-9]+]], -1 -; GFX11: ; use v[[REG]] define amdgpu_kernel void @v_input_output_i16() #0 { +; GFX11-LABEL: v_input_output_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: v_mov_b32 v0, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use v0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm %v = tail call i16 asm sideeffect "v_mov_b32 $0, -1", "=v"() #0 tail call void asm sideeffect "; use $0", "v"(i16 %v) ret void } -; GFX11-LABEL: {{^}}i16_imm_input_phys_vgpr: -; GFX11: v_mov_b32_e32 v0, 0xffff -; GFX11: ; use v0 define amdgpu_kernel void @i16_imm_input_phys_vgpr() { +; GFX11-LABEL: i16_imm_input_phys_vgpr: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use v0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm entry: call void asm sideeffect "; use $0 ", "{v0}"(i16 65535) ret void diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll index 17905b9101c7a..5c5dc09f573ee 100644 --- a/llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll +++ b/llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll @@ -1,54 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s -; GFX11-LABEL: {{^}}s_input_output_i16: -; GFX11: s_mov_b32 s[[REG:[0-9]+]], -1 -; GFX11: ; use s[[REG]] define amdgpu_kernel void @s_input_output_i16() #0 { +; GFX11-LABEL: s_input_output_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: s_mov_b32 s0, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_and_b32 s0, s0, 0xffff +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"() tail call void asm sideeffect "; use $0", "s"(i16 %v) #0 ret void } -; GFX11-LABEL: {{^}}s_input_output_f16: -; GFX11: s_mov_b32 s[[REG:[0-9]+]], -1 -; GFX11: ; use s[[REG]] define amdgpu_kernel void @s_input_output_f16() #0 { +; GFX11-LABEL: s_input_output_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: s_mov_b32 s0, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use s0 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0 tail call void asm sideeffect "; use $0", "s"(half %v) ret void } -; GFX11-LABEL: {{^}}v_input_output_f16: -; GFX11: v_mov_b16 v[[REG:[0-9]+.(l|h)]], -1 -; GFX11: ; use v[[REG]] define amdgpu_kernel void @v_input_output_f16() #0 { +; GFX11-LABEL: v_input_output_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: v_mov_b16 v0.l, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use v0.l +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm %v = tail call half asm sideeffect "v_mov_b16 $0, -1", "=v"() #0 tail call void asm sideeffect "; use $0", "v"(half %v) ret void } -; GFX11-LABEL: {{^}}v_input_output_i16: -; GFX11: v_mov_b16 v[[REG:[0-9]+.(l|h)]], -1 -; GFX11: ; use v[[REG]] define amdgpu_kernel void @v_input_output_i16() #0 { +; GFX11-LABEL: v_input_output_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: v_mov_b16 v0.l, -1 +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use v0.l +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm %v = tail call i16 asm sideeffect "v_mov_b16 $0, -1", "=v"() #0 tail call void asm sideeffect "; use $0", "v"(i16 %v) ret void } -; GFX11-LABEL: {{^}}i16_imm_input_phys_vgpr_lo: -; GFX11: v_mov_b16_e32 v0.l, -1 -; GFX11: ; use v0.l define amdgpu_kernel void @i16_imm_input_phys_vgpr_lo() { +; GFX11-LABEL: i16_imm_input_phys_vgpr_lo: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: v_mov_b16_e32 v0.l, -1 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use v0.l +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm entry: call void asm sideeffect "; use $0 ", "{v0.l}"(i16 65535) ret void } -; GFX11-LABEL: {{^}}i16_imm_input_phys_vgpr_hi: -; GFX11: v_mov_b16_e32 v0.h, -1 -; GFX11: ; use v0.h define amdgpu_kernel void @i16_imm_input_phys_vgpr_hi() { +; GFX11-LABEL: i16_imm_input_phys_vgpr_hi: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: v_mov_b16_e32 v0.h, -1 +; GFX11-NEXT: ;;#ASMSTART +; GFX11-NEXT: ; use v0.h +; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_endpgm entry: call void asm sideeffect "; use $0 ", "{v0.h}"(i16 65535) ret void