[AMDGPU] Enable serializing of allocated preload kernarg SGPRs info #168374

tyb0807 · 2025-11-17T14:16:22Z

Support serialization of the number of allocated preload kernarg SGPRs
Support serialization of the first preload kernarg SGPR allocated

Together they enable reconstructing correctly MIR with preload kernarg SGPRs.

llvmbot · 2025-11-17T14:16:57Z

@llvm/pr-subscribers-backend-amdgpu

Author: None (tyb0807)

Changes

Support serialization of the number of allocated preload kernarg SGPRs
Support serialization of the first preload kernarg SGPR allocated

Together they enable reconstructing correctly MIR with preload kernarg SGPRs.

Full diff: https://github.com/llvm/llvm-project/pull/168374.diff

10 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+29)
(modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (+18-2)
(modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (+5)
(modified) llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll (+2)
(modified) llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll (+1)
(modified) llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll (+1)
(modified) llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll (+1)
(modified) llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir (+4)
(modified) llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll (+4)
(added) llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll (+108)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 5ff16e29bbbb1..354113dfb9fdb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -2027,6 +2027,35 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
                              MFI->ArgInfo.WorkItemIDZ, 0, 0)))
     return true;
 
+  // Parse FirstKernArgPreloadReg separately, since it's a Register,
+  // not ArgDescriptor.
+  if (YamlMFI.ArgInfo && YamlMFI.ArgInfo->FirstKernArgPreloadReg) {
+    const auto &A = *YamlMFI.ArgInfo->FirstKernArgPreloadReg;
+
+    if (!A.IsRegister) {
+      const MemoryBuffer &Buffer =
+        *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
+      Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 0,
+                           SourceMgr::DK_Error,
+                           "firstKernArgPreloadReg must be a register", "", {},
+                           {});
+      return true;
+    }
+
+    Register Reg;
+    if (parseNamedRegisterReference(PFS, Reg, A.RegisterName.Value, Error)) {
+      SourceRange = A.RegisterName.SourceRange;
+      return true;
+    }
+
+    if (!AMDGPU::SGPR_32RegClass.contains(Reg))
+      return diagnoseRegisterClass(A.RegisterName);
+
+    MFI->ArgInfo.FirstKernArgPreloadReg = Reg;
+
+    MFI->NumUserSGPRs += YamlMFI.NumKernargPreloadSGPRs;
+  }
+
   if (ST.hasIEEEMode())
     MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
   if (ST.hasDX10ClampMode())
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index b398db4f7caff..55a01ce97508e 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -696,7 +696,6 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
     return true;
   };
 
-  // TODO: Need to serialize kernarg preloads.
   bool Any = false;
   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
@@ -718,6 +717,20 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
 
+  // Write FirstKernArgPreloadReg separately, since it's a Register,
+  // not ArgDescriptor.
+  if (ArgInfo.FirstKernArgPreloadReg) {
+    Register Reg = ArgInfo.FirstKernArgPreloadReg;
+    if (Reg.isPhysical()) {
+      yaml::SIArgument SA = yaml::SIArgument::createArgument(true);
+      raw_string_ostream OS(SA.RegisterName.Value);
+      OS << printReg(Reg, &TRI);
+
+      AI.FirstKernArgPreloadReg = SA;
+      Any = true;
+    }
+  }
+
   if (Any)
     return AI;
 
@@ -750,7 +763,8 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
       Mode(MFI.getMode()), HasInitWholeWave(MFI.hasInitWholeWave()),
       IsWholeWaveFunction(MFI.isWholeWaveFunction()),
       DynamicVGPRBlockSize(MFI.getDynamicVGPRBlockSize()),
-      ScratchReservedForDynamicVGPRs(MFI.getScratchReservedForDynamicVGPRs()) {
+      ScratchReservedForDynamicVGPRs(MFI.getScratchReservedForDynamicVGPRs()),
+              NumKernargPreloadSGPRs(MFI.getNumKernargPreloadedSGPRs()) {
   for (Register Reg : MFI.getSGPRSpillPhysVGPRs())
     SpillPhysVGPRS.push_back(regToString(Reg, TRI));
 
@@ -799,6 +813,8 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
   ReturnsVoid = YamlMFI.ReturnsVoid;
   IsWholeWaveFunction = YamlMFI.IsWholeWaveFunction;
 
+  UserSGPRInfo.allocKernargPreloadSGPRs(YamlMFI.NumKernargPreloadSGPRs);
+
   if (YamlMFI.ScavengeFI) {
     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
     if (!FIOrErr) {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index ca3c35067a923..d901f4c216551 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -170,6 +170,7 @@ struct SIArgumentInfo {
   std::optional<SIArgument> DispatchID;
   std::optional<SIArgument> FlatScratchInit;
   std::optional<SIArgument> PrivateSegmentSize;
+  std::optional<SIArgument> FirstKernArgPreloadReg;
 
   std::optional<SIArgument> WorkGroupIDX;
   std::optional<SIArgument> WorkGroupIDY;
@@ -195,6 +196,7 @@ template <> struct MappingTraits<SIArgumentInfo> {
     YamlIO.mapOptional("dispatchID", AI.DispatchID);
     YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
     YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
+    YamlIO.mapOptional("firstKernArgPreloadReg", AI.FirstKernArgPreloadReg);
 
     YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
     YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
@@ -305,6 +307,8 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
   unsigned DynamicVGPRBlockSize = 0;
   unsigned ScratchReservedForDynamicVGPRs = 0;
 
+  unsigned NumKernargPreloadSGPRs = 0;
+
   SIMachineFunctionInfo() = default;
   SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
                         const TargetRegisterInfo &TRI,
@@ -361,6 +365,7 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
     YamlIO.mapOptional("dynamicVGPRBlockSize", MFI.DynamicVGPRBlockSize, false);
     YamlIO.mapOptional("scratchReservedForDynamicVGPRs",
                        MFI.ScratchReservedForDynamicVGPRs, 0);
+    YamlIO.mapOptional("numKernargPreloadSGPRs", MFI.NumKernargPreloadSGPRs, 0);
     YamlIO.mapOptional("isWholeWaveFunction", MFI.IsWholeWaveFunction, false);
   }
 };
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
index ed8bc9ca700a8..d1fae8ae92a2a 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
@@ -48,6 +48,7 @@
 ; CHECK-NEXT:   hasInitWholeWave: false
 ; CHECK-NEXT:   dynamicVGPRBlockSize: 0
 ; CHECK-NEXT:   scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT:   numKernargPreloadSGPRs: 0
 ; CHECK-NEXT:   isWholeWaveFunction: false
 ; CHECK-NEXT: body:
   define amdgpu_kernel void @long_branch_used_all_sgprs(ptr addrspace(1) %arg, i32 %cnd) #0 {
@@ -320,6 +321,7 @@
 ; CHECK-NEXT:   hasInitWholeWave: false
 ; CHECK-NEXT:   dynamicVGPRBlockSize: 0
 ; CHECK-NEXT:   scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT:   numKernargPreloadSGPRs: 0
 ; CHECK-NEXT:   isWholeWaveFunction: false
 ; CHECK-NEXT: body:
   define amdgpu_kernel void @long_branch_high_num_sgprs_used(ptr addrspace(1) %arg, i32 %cnd) #0 {
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
index 68c3d1b2f2972..3e4eaf0a3cd98 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
@@ -48,6 +48,7 @@
 ; AFTER-PEI-NEXT: hasInitWholeWave: false
 ; AFTER-PEI-NEXT: dynamicVGPRBlockSize: 0
 ; AFTER-PEI-NEXT: scratchReservedForDynamicVGPRs: 0
+; AFTER-PEI-NEXT: numKernargPreloadSGPRs: 0
 ; AFTER-PEI-NEXT: isWholeWaveFunction: false
 ; AFTER-PEI-NEXT: body:
 define amdgpu_kernel void @scavenge_fi(ptr addrspace(1) %out, i32 %in) #0 {
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
index 55598ec70d953..2d820102e8706 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
@@ -48,6 +48,7 @@
 ; CHECK-NEXT: hasInitWholeWave: false
 ; CHECK-NEXT: dynamicVGPRBlockSize: 0
 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT: numKernargPreloadSGPRs: 0
 ; CHECK-NEXT: isWholeWaveFunction: false
 ; CHECK-NEXT: body:
   define amdgpu_kernel void @uniform_long_forward_branch_debug(ptr addrspace(1) %arg, i32 %arg1) #0 !dbg !5 {
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
index 2326b2dc09b58..c949a3d94c6a3 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
@@ -48,6 +48,7 @@
 ; CHECK-NEXT: hasInitWholeWave: false
 ; CHECK-NEXT: dynamicVGPRBlockSize: 0
 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT: numKernargPreloadSGPRs: 0
 ; CHECK-NEXT: isWholeWaveFunction: false
 ; CHECK-NEXT: body:
 define amdgpu_kernel void @uniform_long_forward_branch(ptr addrspace(1) %arg, i32 %arg1) #0 {
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
index 0cb9bc095bc50..87c3eb626ef0d 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -57,6 +57,7 @@
 # FULL-NEXT:  hasInitWholeWave: false
 # FULL-NEXT: dynamicVGPRBlockSize: 0
 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0
+# FULL-NEXT: numKernargPreloadSGPRs: 0
 # FULL-NEXT: isWholeWaveFunction: false
 # FULL-NEXT: body:
 
@@ -167,6 +168,7 @@ body:             |
 # FULL-NEXT: hasInitWholeWave: false
 # FULL-NEXT: dynamicVGPRBlockSize: 0
 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0
+# FULL-NEXT: numKernargPreloadSGPRs: 0
 # FULL-NEXT: isWholeWaveFunction: false
 # FULL-NEXT: body:
 
@@ -248,6 +250,7 @@ body:             |
 # FULL-NEXT: hasInitWholeWave: false
 # FULL-NEXT: dynamicVGPRBlockSize: 0
 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0
+# FULL-NEXT: numKernargPreloadSGPRs: 0
 # FULL-NEXT: isWholeWaveFunction: false
 # FULL-NEXT: body:
 
@@ -330,6 +333,7 @@ body:             |
 # FULL-NEXT: hasInitWholeWave: false
 # FULL-NEXT: dynamicVGPRBlockSize: 0
 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0
+# FULL-NEXT: numKernargPreloadSGPRs: 0
 # FULL-NEXT: isWholeWaveFunction: false
 # FULL-NEXT: body:
 
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index ab4383b675243..ab3c0335f8ea9 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -58,6 +58,7 @@
 ; CHECK-NEXT: hasInitWholeWave: false
 ; CHECK-NEXT: dynamicVGPRBlockSize: 0
 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT: numKernargPreloadSGPRs: 0
 ; CHECK-NEXT: isWholeWaveFunction: false
 ; CHECK-NEXT: body:
 define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
@@ -110,6 +111,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
 ; CHECK-NEXT: hasInitWholeWave: false
 ; CHECK-NEXT: dynamicVGPRBlockSize: 0
 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT: numKernargPreloadSGPRs: 0
 ; CHECK-NEXT: isWholeWaveFunction: false
 ; CHECK-NEXT: body:
 define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
@@ -186,6 +188,7 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
 ; CHECK-NEXT: hasInitWholeWave: false
 ; CHECK-NEXT: dynamicVGPRBlockSize: 0
 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT: numKernargPreloadSGPRs: 0
 ; CHECK-NEXT: isWholeWaveFunction: false
 ; CHECK-NEXT: body:
 define void @function() {
@@ -244,6 +247,7 @@ define void @function() {
 ; CHECK-NEXT: hasInitWholeWave: false
 ; CHECK-NEXT: dynamicVGPRBlockSize: 0
 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT: numKernargPreloadSGPRs: 0
 ; CHECK-NEXT: isWholeWaveFunction: false
 ; CHECK-NEXT: body:
 define void @function_nsz() #0 {
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll
new file mode 100644
index 0000000000000..de9a268805995
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll
@@ -0,0 +1,108 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -stop-after=amdgpu-isel %s -o - | FileCheck --check-prefix=MIR %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -stop-after=amdgpu-isel -o %t.mir %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %t.mir -o - | FileCheck --check-prefix=ASM %s
+
+; Test that kernarg preloading information is correctly serialized to MIR and
+; can be round-tripped through MIR serialization/deserialization.
+
+; MIR-LABEL: name: kernarg_preload_single_arg
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' }
+; MIR: numKernargPreloadSGPRs: 1
+
+; ASM-LABEL: kernarg_preload_single_arg:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 1
+; ASM: .amdhsa_user_sgpr_kernarg_preload_offset 0
+define amdgpu_kernel void @kernarg_preload_single_arg(i32 inreg %arg0) {
+entry:
+  %val = add i32 %arg0, 1
+  store i32 %val, ptr addrspace(1) null
+  ret void
+}
+
+; MIR-LABEL: name: kernarg_preload_multiple_args_unaligned
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' }
+; MIR: numKernargPreloadSGPRs: 5
+
+; ASM-LABEL: kernarg_preload_multiple_args_unaligned:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 5
+; ASM: .amdhsa_user_sgpr_kernarg_preload_offset 0
+define amdgpu_kernel void @kernarg_preload_multiple_args_unaligned(i32 inreg %arg0, i64 inreg %arg1, i32 inreg %arg2) {
+entry:
+  %val = add i32 %arg0, %arg2
+  store i32 %val, ptr addrspace(1) null
+  ret void
+}
+
+; MIR-LABEL: name: kernarg_preload_multiple_args_aligned
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' }
+; MIR: numKernargPreloadSGPRs: 4
+
+; ASM-LABEL: kernarg_preload_multiple_args_aligned:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 4
+; ASM: .amdhsa_user_sgpr_kernarg_preload_offset 0
+define amdgpu_kernel void @kernarg_preload_multiple_args_aligned(i64 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2) {
+entry:
+  %val = add i32 %arg1, %arg2
+  store i32 %val, ptr addrspace(1) null
+  ret void
+}
+
+; MIR-LABEL: name: kernarg_preload_with_ptr
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' }
+; MIR: numKernargPreloadSGPRs: 2
+
+; ASM-LABEL: kernarg_preload_with_ptr:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 2
+; ASM: .amdhsa_user_sgpr_kernarg_preload_offset 0
+define amdgpu_kernel void @kernarg_preload_with_ptr(ptr inreg %ptr) {
+entry:
+  %val = load i32, ptr %ptr
+  %add = add i32 %val, 1
+  store i32 %add, ptr %ptr
+  ret void
+}
+
+; MIR-LABEL: name: kernarg_no_preload
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; MIR-NOT: firstKernArgPreloadReg
+; MIR: numKernargPreloadSGPRs: 0
+
+; ASM-LABEL: kernarg_no_preload:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 0
+define amdgpu_kernel void @kernarg_no_preload(i32 %arg0) {
+entry:
+  %val = add i32 %arg0, 1
+  store i32 %val, ptr addrspace(1) null
+  ret void
+}
+
+; MIR-LABEL: name: kernarg_preload_mixed
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' }
+; MIR: numKernargPreloadSGPRs: 2
+
+; ASM-LABEL: kernarg_preload_mixed:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 2
+define amdgpu_kernel void @kernarg_preload_mixed(i32 inreg %arg0, i32 inreg %arg1, i32 %arg2) {
+entry:
+  %val = add i32 %arg0, %arg1
+  %val2 = add i32 %val, %arg2
+  store i32 %val2, ptr addrspace(1) null
+  ret void
+}

github-actions · 2025-11-17T14:18:11Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll

- Support serialization of the number of allocated preload kernarg SGPRs - Support serialization of the first preload kernarg SGPR allocated Together they enable reconstructing correctly MIR with preload kernarg SGPRs.

github-actions · 2025-11-20T00:20:30Z

🐧 Linux x64 Test Results

186460 tests passed
4872 tests skipped

qcolombet

I let @kerbowa give the final approval, but this looks good to me.
I've added a couple of comments.

qcolombet · 2025-11-20T10:47:33Z

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

+    if (Reg.isPhysical()) {
+      yaml::SIArgument SA = yaml::SIArgument::createArgument(true);
+      raw_string_ostream OS(SA.RegisterName.Value);
+      OS << printReg(Reg, &TRI);


Reuse convertArg here instead of inlining the logic.

I don't think we can because ArgInfo.FirstKernArgPreloadReg is a Register and not ArgDescriptor

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir

llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir

llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir

…lvm#168374) - Support serialization of the number of allocated preload kernarg SGPRs - Support serialization of the first preload kernarg SGPR allocated Together they enable reconstructing correctly MIR with preload kernarg SGPRs.

tyb0807 requested a review from kerbowa November 17, 2025 14:16

llvmbot added the backend:AMDGPU label Nov 17, 2025

tyb0807 force-pushed the preload_kernarg branch from 014ca4e to efc5308 Compare November 17, 2025 14:29

kerbowa reviewed Nov 18, 2025

View reviewed changes

llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll Show resolved Hide resolved

tyb0807 requested review from kerbowa and qcolombet November 19, 2025 23:33

tyb0807 added 2 commits November 19, 2025 17:34

[AMDGPU] Enable serializing of allocated preload kernarg SGPRs info

63f624e

- Support serialization of the number of allocated preload kernarg SGPRs - Support serialization of the first preload kernarg SGPR allocated Together they enable reconstructing correctly MIR with preload kernarg SGPRs.

Address comments

13a6ac7

tyb0807 force-pushed the preload_kernarg branch from f76bf58 to 13a6ac7 Compare November 19, 2025 23:34

qcolombet reviewed Nov 20, 2025

View reviewed changes

arsenm reviewed Nov 20, 2025

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Outdated Show resolved Hide resolved

More tests + address comments

d6b6f6a

tyb0807 requested review from arsenm and qcolombet November 20, 2025 23:36

arsenm approved these changes Nov 21, 2025

View reviewed changes

kerbowa approved these changes Nov 21, 2025

View reviewed changes

Format + review comments

2272203

tyb0807 merged commit 29d1e18 into llvm:main Nov 22, 2025
10 checks passed

tyb0807 deleted the preload_kernarg branch November 22, 2025 22:03

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Enable serializing of allocated preload kernarg SGPRs info #168374

[AMDGPU] Enable serializing of allocated preload kernarg SGPRs info #168374

tyb0807 commented Nov 17, 2025

Uh oh!

llvmbot commented Nov 17, 2025

Uh oh!

github-actions bot commented Nov 17, 2025 •

edited

Loading

Uh oh!

Uh oh!

github-actions bot commented Nov 20, 2025 •

edited

Loading

Uh oh!

qcolombet left a comment

Uh oh!

qcolombet Nov 20, 2025

Uh oh!

tyb0807 Nov 20, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

[AMDGPU] Enable serializing of allocated preload kernarg SGPRs info #168374

[AMDGPU] Enable serializing of allocated preload kernarg SGPRs info #168374

Conversation

tyb0807 commented Nov 17, 2025

Uh oh!

llvmbot commented Nov 17, 2025

Uh oh!

github-actions bot commented Nov 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Nov 20, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🐧 Linux x64 Test Results

Uh oh!

qcolombet left a comment

Choose a reason for hiding this comment

Uh oh!

qcolombet Nov 20, 2025

Choose a reason for hiding this comment

Uh oh!

tyb0807 Nov 20, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

github-actions bot commented Nov 17, 2025 •

edited

Loading

github-actions bot commented Nov 20, 2025 •

edited

Loading