[AMDGPU][Fake16] Support OPSEL for `v_cvt_f16_f32` and `v_cvt_f32_f16` #139185

shiltian · 2025-05-09T00:50:21Z

Even though these two instructions were introduced in gfx11 and support true16, we can't really use them in scenarios where both true16 and fake16 instructions exist and the opsel feature is required. In such cases, we still need to provide opsel support for fake16.

Fixes SWDEV-531225.

shiltian · 2025-05-09T00:50:36Z

[AMDGPU][Fake16] Support OPSEL for v_cvt_f16_f32 and v_cvt_f32_f16 #139185 👈 (View in Graphite)
main

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-05-09T00:52:17Z

@llvm/pr-subscribers-llvm-globalisel
@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Shilei Tian (shiltian)

Changes

Patch is 31.76 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139185.diff

13 Files Affected:

(modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+10-4)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir (+4-4)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir (+4-4)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir (+6-6)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir (+6-6)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir (+2-2)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt (+2-2)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt (+2-2)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt (+2-2)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt (+2-2)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt (+2-2)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt (+2-2)

diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 352a3f9c2d27f..5522d89855332 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -261,6 +261,11 @@ foreach vt = Reg32Types.types in {
   >;
 }
 
+let HasOpSel = 1 in {
+  def VOP_F16_F32_Fake16_OP_SEL : VOPProfile_Fake16<VOP_F16_F32>;
+  def VOP_F32_F16_Fake16_OP_SEL : VOPProfile_Fake16<VOP_F32_F16>;
+} // End HasOpSel = 1
+
 let isReMaterializable = 1 in {
 let SchedRW = [WriteDoubleCvt] in {
 // OMod clears exceptions when set in this instruction
@@ -301,15 +306,16 @@ let FPDPRounding = 1, isReMaterializable = 0 in {
     defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
   let OtherPredicates = [UseRealTrue16Insts] in
     defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>;
-  let OtherPredicates = [UseFakeTrue16Insts] in
-    defm V_CVT_F16_F32_fake16 : VOP1Inst <"v_cvt_f16_f32_fake16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
+  let OtherPredicates = [UseFakeTrue16Insts] in defm V_CVT_F16_F32_fake16
+      : VOP1Inst<"v_cvt_f16_f32_fake16", VOP_F16_F32_Fake16_OP_SEL,
+                 any_fpround>;
 } // End FPDPRounding = 1, isReMaterializable = 0
 let OtherPredicates = [NotHasTrue16BitInsts] in
   defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
 let OtherPredicates = [UseRealTrue16Insts] in
   defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>;
-let OtherPredicates = [UseFakeTrue16Insts] in
-  defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
+let OtherPredicates = [UseFakeTrue16Insts] in defm V_CVT_F32_F16_fake16
+    : VOP1Inst<"v_cvt_f32_f16_fake16", VOP_F32_F16_Fake16_OP_SEL, any_fpextend>;
 
 let SubtargetPredicate = HasBF16ConversionInsts in
 defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
index 49383135ab0c5..66c8d11bfcc9e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
@@ -26,8 +26,8 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -62,8 +62,8 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
index 828eb5d3fb40a..c7715eec76d37 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
@@ -26,8 +26,8 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -62,8 +62,8 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
index 03cb907f82a16..2c7eb23dab364 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
@@ -149,7 +149,7 @@ body: |
     ; GFX11-FAKE16: liveins: $vgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -196,7 +196,7 @@ body: |
     ; GFX11-FAKE16: liveins: $sgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
     %0:sgpr(s32) = COPY $sgpr0
@@ -251,7 +251,7 @@ body: |
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
     ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -301,7 +301,7 @@ body: |
     ; GFX11-FAKE16: liveins: $vgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -350,7 +350,7 @@ body: |
     ; GFX11-FAKE16: liveins: $sgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
     %0:sgpr(s32) = COPY $sgpr0
@@ -407,7 +407,7 @@ body: |
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
     ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
index 521a0e8a2a796..489a6b360124f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
@@ -99,7 +99,7 @@ body: |
     ; GFX11-FAKE16: liveins: $vgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -146,7 +146,7 @@ body: |
     ; GFX11-FAKE16: liveins: $sgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
     %0:sgpr(s32) = COPY $sgpr0
@@ -201,7 +201,7 @@ body: |
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
     ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -251,7 +251,7 @@ body: |
     ; GFX11-FAKE16: liveins: $vgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -300,7 +300,7 @@ body: |
     ; GFX11-FAKE16: liveins: $sgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
     %0:sgpr(s32) = COPY $sgpr0
@@ -357,7 +357,7 @@ body: |
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
     ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
index 3888ce87b46fd..72e8fc52917a6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
@@ -101,7 +101,7 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s16) = G_SITOFP %0
@@ -150,7 +150,7 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX11-FAKE16-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:vgpr(s16) = G_SITOFP %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
index 35d622dc57d18..1e3c4c8e596b5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
@@ -115,7 +115,7 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s16) = G_UITOFP %0
@@ -164,7 +164,7 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX11-FAKE16-NEXT: [[V_CVT_F32_U32...
[truncated]

shiltian · 2025-05-09T04:49:00Z

llvm/lib/Target/AMDGPU/VOP1Instructions.td

interesting, since when clang-format can format tablegen file?

rampitec · 2025-05-09T16:50:41Z

llvm/lib/Target/AMDGPU/VOP1Instructions.td

    defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>;
  let OtherPredicates = [UseFakeTrue16Insts] in
-    defm V_CVT_F16_F32_fake16 : VOP1Inst <"v_cvt_f16_f32_fake16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
+    defm V_CVT_F16_F32_fake16 : VOP1Inst<"v_cvt_f16_f32_fake16", VOP_F16_F32_Fake16_OP_SEL, any_fpround>;


Can it use VOP1Inst_t16 instead? It will define both _t16 and _fake16.

Probably not, since we only want to use VOP_F16_F32_Fake16_OP_SEL for _fake16.

There is also VOP1Inst_t16_with_profiles...

The comment above this might explain the reason:

// V_CVT_F16_F32 and V_CVT_F32_F16 are special cases because they are // present in targets without Has16BitInsts. Otherwise they could use // class VOP1Inst_t16

shiltian · 2025-05-09T18:11:49Z

We might be able to work around this. Close this for now.

shiltian mentioned this pull request May 9, 2025

[NFC][AMDGPU] Auto generate check lines for schedule-regpressure-ilp-metric-spills.mir #139184

Closed

shiltian requested review from Sisyph, arsenm, broxigarchen, jayfoad, rampitec and vikramRH May 9, 2025 00:51

shiltian added the backend:AMDGPU label May 9, 2025

shiltian force-pushed the users/shiltian/opsel-for-cvt-between-f32-and-f16 branch from 810682a to b5f6c0b Compare May 9, 2025 04:33

shiltian changed the base branch from users/shiltian/auto-generate-check-lines-for-schedule-regpressure-ilp-metric-spills to main May 9, 2025 04:33

shiltian marked this pull request as ready for review May 9, 2025 04:33

llvmbot added llvm:mc Machine (object) code llvm:globalisel labels May 9, 2025

shiltian force-pushed the users/shiltian/opsel-for-cvt-between-f32-and-f16 branch from b5f6c0b to 6bb5d67 Compare May 9, 2025 04:46

shiltian commented May 9, 2025

View reviewed changes

llvm/lib/Target/AMDGPU/VOP1Instructions.td Outdated

Copy link

Contributor Author

shiltian May 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

interesting, since when clang-format can format tablegen file?

shiltian force-pushed the users/shiltian/opsel-for-cvt-between-f32-and-f16 branch from 6bb5d67 to c5b6269 Compare May 9, 2025 04:49

[AMDGPU][Fake16] Support OPSEL for v_cvt_f16_f32 and v_cvt_f32_f16

88d9bc8

shiltian force-pushed the users/shiltian/opsel-for-cvt-between-f32-and-f16 branch from c5b6269 to 88d9bc8 Compare May 9, 2025 16:15

rampitec reviewed May 9, 2025

View reviewed changes

shiltian closed this May 9, 2025

shiltian deleted the users/shiltian/opsel-for-cvt-between-f32-and-f16 branch May 9, 2025 18:39

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU][Fake16] Support OPSEL for `v_cvt_f16_f32` and `v_cvt_f32_f16` #139185

[AMDGPU][Fake16] Support OPSEL for `v_cvt_f16_f32` and `v_cvt_f32_f16` #139185

Uh oh!

shiltian commented May 9, 2025 •

edited

Loading

Uh oh!

shiltian commented May 9, 2025 •

edited

Loading

Uh oh!

llvmbot commented May 9, 2025 •

edited

Loading

Uh oh!

shiltian May 9, 2025

Uh oh!

rampitec May 9, 2025

Uh oh!

shiltian May 9, 2025

Uh oh!

rampitec May 9, 2025

Uh oh!

shiltian May 9, 2025

Uh oh!

shiltian commented May 9, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[AMDGPU][Fake16] Support OPSEL for v_cvt_f16_f32 and v_cvt_f32_f16 #139185

[AMDGPU][Fake16] Support OPSEL for v_cvt_f16_f32 and v_cvt_f32_f16 #139185

Uh oh!

Conversation

shiltian commented May 9, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

shiltian commented May 9, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 9, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

shiltian May 9, 2025

Choose a reason for hiding this comment

Uh oh!

rampitec May 9, 2025

Choose a reason for hiding this comment

Uh oh!

shiltian May 9, 2025

Choose a reason for hiding this comment

Uh oh!

rampitec May 9, 2025

Choose a reason for hiding this comment

Uh oh!

shiltian May 9, 2025

Choose a reason for hiding this comment

Uh oh!

shiltian commented May 9, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[AMDGPU][Fake16] Support OPSEL for `v_cvt_f16_f32` and `v_cvt_f32_f16` #139185

[AMDGPU][Fake16] Support OPSEL for `v_cvt_f16_f32` and `v_cvt_f32_f16` #139185

shiltian commented May 9, 2025 •

edited

Loading

shiltian commented May 9, 2025 •

edited

Loading

llvmbot commented May 9, 2025 •

edited

Loading