[AMDGPU][True16][CodeGen] select vgpr16 for asm inline 16bit vreg #140946

broxigarchen · 2025-05-21T18:48:08Z

select vgpr16 for asm inline 16bit vreg in true16 mode

llvmbot · 2025-05-21T18:49:51Z

@llvm/pr-subscribers-backend-amdgpu

Author: Brox Chen (broxigarchen)

Changes

select vgpr16 for asm inline 16bit reg

Full diff: https://github.com/llvm/llvm-project/pull/140946.diff

3 Files Affected:

(modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+4-1)
(added) llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll (+48)
(added) llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll (+48)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ba7e11a853347..a4b62454e782d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16062,7 +16062,10 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
     case 'v':
       switch (BitWidth) {
       case 16:
-        RC = &AMDGPU::VGPR_32RegClass;
+        if (Subtarget->useRealTrue16Insts())
+          RC = &AMDGPU::VGPR_16RegClass;
+        else
+          RC = &AMDGPU::VGPR_32RegClass;
         break;
       default:
         RC = TRI->getVGPRClassForBitWidth(BitWidth);
diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll
new file mode 100644
index 0000000000000..0f268c796c695
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s 2>&1 | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
+
+; GFX11-LABEL: {{^}}s_input_output_i16:
+; GFX11: s_mov_b32 s[[REG:[0-9]+]], -1
+; GFX11: ; use s[[REG]]
+define amdgpu_kernel void @s_input_output_i16() #0 {
+  %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"()
+  tail call void asm sideeffect "; use $0", "s"(i16 %v) #0
+  ret void
+}
+
+; GFX11-LABEL: {{^}}s_input_output_f16:
+; GFX11: s_mov_b32 s[[REG:[0-9]+]], -1
+; GFX11: ; use s[[REG]]
+define amdgpu_kernel void @s_input_output_f16() #0 {
+  %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0
+  tail call void asm sideeffect "; use $0", "s"(half %v)
+  ret void
+}
+
+; GFX11-LABEL: {{^}}v_input_output_f16:
+; GFX11: v_mov_b32 v[[REG:[0-9]+]], -1
+; GFX11: ; use v[[REG]]
+define amdgpu_kernel void @v_input_output_f16() #0 {
+  %v = tail call half asm sideeffect "v_mov_b32 $0, -1", "=v"() #0
+  tail call void asm sideeffect "; use $0", "v"(half %v)
+  ret void
+}
+
+; GFX11-LABEL: {{^}}v_input_output_i16:
+; GFX11: v_mov_b32 v[[REG:[0-9]+]], -1
+; GFX11: ; use v[[REG]]
+define amdgpu_kernel void @v_input_output_i16() #0 {
+  %v = tail call i16 asm sideeffect "v_mov_b32 $0, -1", "=v"() #0
+  tail call void asm sideeffect "; use $0", "v"(i16 %v)
+  ret void
+}
+
+; GFX11-LABEL: {{^}}i16_imm_input_phys_vgpr:
+; GFX11: v_mov_b32_e32 v0, 0xffff
+; GFX11: ; use v0
+define amdgpu_kernel void @i16_imm_input_phys_vgpr() {
+entry:
+  call void asm sideeffect "; use $0 ", "{v0}"(i16 65535)
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll
new file mode 100644
index 0000000000000..908fb840e8d2c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s 2>&1 | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
+
+; GFX11-LABEL: {{^}}s_input_output_i16:
+; GFX11: s_mov_b32 s[[REG:[0-9]+]], -1
+; GFX11: ; use s[[REG]]
+define amdgpu_kernel void @s_input_output_i16() #0 {
+  %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"()
+  tail call void asm sideeffect "; use $0", "s"(i16 %v) #0
+  ret void
+}
+
+; GFX11-LABEL: {{^}}s_input_output_f16:
+; GFX11: s_mov_b32 s[[REG:[0-9]+]], -1
+; GFX11: ; use s[[REG]]
+define amdgpu_kernel void @s_input_output_f16() #0 {
+  %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0
+  tail call void asm sideeffect "; use $0", "s"(half %v)
+  ret void
+}
+
+; GFX11-LABEL: {{^}}v_input_output_f16:
+; GFX11: v_mov_b16 v[[REG:[0-9]+.(l|h)]], -1
+; GFX11: ; use v[[REG]]
+define amdgpu_kernel void @v_input_output_f16() #0 {
+  %v = tail call half asm sideeffect "v_mov_b16 $0, -1", "=v"() #0
+  tail call void asm sideeffect "; use $0", "v"(half %v)
+  ret void
+}
+
+; GFX11-LABEL: {{^}}v_input_output_i16:
+; GFX11: v_mov_b16 v[[REG:[0-9]+.(l|h)]], -1
+; GFX11: ; use v[[REG]]
+define amdgpu_kernel void @v_input_output_i16() #0 {
+  %v = tail call i16 asm sideeffect "v_mov_b16 $0, -1", "=v"() #0
+  tail call void asm sideeffect "; use $0", "v"(i16 %v)
+  ret void
+}
+
+; GFX11-LABEL: {{^}}i16_imm_input_phys_vgpr:
+; GFX11: v_mov_b16_e32 v0.l, -1
+; GFX11: ; use v0
+define amdgpu_kernel void @i16_imm_input_phys_vgpr() {
+entry:
+  call void asm sideeffect "; use $0 ", "{v0.l}"(i16 65535)
+  ret void
+}
+
+attributes #0 = { nounwind }

arsenm · 2025-05-21T19:32:08Z

llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll

@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s 2>&1 | FileCheck -enable-var-scope -check-prefixes=GFX11 %s


Why do these need to be separate files? Also don't need -verify-machineinstrs, or to redirect stderr

The v_mov_b16 vs v_mov_b32 in asm

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

rampitec · 2025-05-21T19:35:17Z

llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll

+; GFX11: ; use v0.l
+define amdgpu_kernel void @i16_imm_input_phys_vgpr() {
+entry:
+  call void asm sideeffect "; use $0 ", "{v0.l}"(i16 65535)


Can you please add one more test with v0.h?

And do we need a constraint to specify specifically an l or h for a virtual register?

Added a .h case

And do we need a constraint to specify specifically an l or h for a virtual register?

I do not think it is practically needed. At least it is not needed for correctness at this point.

rampitec

LGTM, thanks!

rampitec · 2025-05-21T20:30:49Z

llvm/test/CodeGen/AMDGPU/inlineasm-16-true16.ll

+; GFX11: ; use v0.l
+define amdgpu_kernel void @i16_imm_input_phys_vgpr() {
+entry:
+  call void asm sideeffect "; use $0 ", "{v0.l}"(i16 65535)


And do we need a constraint to specify specifically an l or h for a virtual register?

I do not think it is practically needed. At least it is not needed for correctness at this point.

shiltian · 2025-05-21T20:38:12Z

llvm/test/CodeGen/AMDGPU/inlineasm-16-fake16.ll

@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s


can you auto generate check lines?

jayfoad · 2025-05-22T08:38:58Z

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

@@ -16062,7 +16062,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
    case 'v':
      switch (BitWidth) {


switch seems overkill here. Could just handle it with:

if (BitWidth == 16 && !Subtarget->useRealTrue16Insts()) BitWidth = 32;

Or even move the handling for the BitWidth == 16 case inside getVGPRClassForBitWidth?

This reminds me we need constraints for the aligned and unaligned versions of register classes

broxigarchen changed the title ~~16bit for asm inline reg~~ [AMDGPU][True16][CodeGen] select vgpr16 for asm inline 16bit reg May 21, 2025

broxigarchen marked this pull request as ready for review May 21, 2025 18:49

broxigarchen requested review from Sisyph and rampitec May 21, 2025 18:49

llvmbot added the backend:AMDGPU label May 21, 2025

broxigarchen requested review from arsenm and kosarev May 21, 2025 18:49

broxigarchen force-pushed the main-fix-asm-inline-1 branch from 47427c7 to d77dc1f Compare May 21, 2025 18:50

broxigarchen changed the title ~~[AMDGPU][True16][CodeGen] select vgpr16 for asm inline 16bit reg~~ [AMDGPU][True16][CodeGen] select vgpr16 for asm inline 16bit vreg May 21, 2025

broxigarchen mentioned this pull request May 21, 2025

[AMDGPU][True16] set true16 mode as default on gfx110x #140736

Merged

arsenm reviewed May 21, 2025

View reviewed changes

rampitec reviewed May 21, 2025

View reviewed changes

broxigarchen force-pushed the main-fix-asm-inline-1 branch from d77dc1f to f3a655a Compare May 21, 2025 20:22

16bit for asm inline reg

c1b6d35

broxigarchen force-pushed the main-fix-asm-inline-1 branch from f3a655a to c1b6d35 Compare May 21, 2025 20:27

rampitec approved these changes May 21, 2025

View reviewed changes

shiltian reviewed May 21, 2025

View reviewed changes

arsenm approved these changes May 21, 2025

View reviewed changes

auto generate check line

67d4d9f

shiltian approved these changes May 21, 2025

View reviewed changes

broxigarchen merged commit 7f62800 into llvm:main May 21, 2025
8 of 10 checks passed

jayfoad reviewed May 22, 2025

View reviewed changes

		@@ -0,0 +1,48 @@
		; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s 2>&1 \| FileCheck -enable-var-scope -check-prefixes=GFX11 %s

		@@ -16062,7 +16062,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
		case 'v':
		switch (BitWidth) {

[AMDGPU][True16][CodeGen] select vgpr16 for asm inline 16bit vreg #140946

[AMDGPU][True16][CodeGen] select vgpr16 for asm inline 16bit vreg #140946

Uh oh!

Conversation

broxigarchen commented May 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 21, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

rampitec left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants

broxigarchen commented May 21, 2025 •

edited

Loading