diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index f0d1117664983..201235908c55d 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -62,6 +62,7 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" +#include "SIMachineFunctionInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" @@ -2507,6 +2508,14 @@ SILoadStoreOptimizer::collectMergeableInsts( LLVM_DEBUG(dbgs() << "Skip tbuffer with unknown format: " << MI); continue; } + + const MachineFunction *MF = MI.getParent()->getParent(); + const auto *MFI = MF->getInfo(); + if (!MFI->isRelaxedTBufferOOBMode()) { + LLVM_DEBUG( + dbgs() << "Skip tbuffer combine: relaxed mode not enabled\n"); + continue; + } } CombineInfo CI; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 1f11be475e9f8..281961e82610d 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -197,6 +197,12 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, } ClusterDims = AMDGPU::ClusterDimsAttr::get(F); + + // Enable relaxed TBUFFER OOB mode if amdgpu.oob.mode has bit 0x2 set. + if (const auto *CI = mdconst::extract_or_null( + F.getParent()->getModuleFlag("amdgpu.oob.mode")); + CI && (CI->getZExtValue() & 0x2)) + setRelaxedTBufferOOBMode(true); } MachineFunctionInfo *SIMachineFunctionInfo::clone( @@ -744,6 +750,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( MaxMemoryClusterDWords(MFI.getMaxMemoryClusterDWords()), Mode(MFI.getMode()), HasInitWholeWave(MFI.hasInitWholeWave()), IsWholeWaveFunction(MFI.isWholeWaveFunction()), + RelaxedTBufferOOBMode(MFI.isRelaxedTBufferOOBMode()), DynamicVGPRBlockSize(MFI.getDynamicVGPRBlockSize()), ScratchReservedForDynamicVGPRs(MFI.getScratchReservedForDynamicVGPRs()) { for (Register Reg : MFI.getSGPRSpillPhysVGPRs()) @@ -793,6 +800,7 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( BytesInStackArgArea = YamlMFI.BytesInStackArgArea; ReturnsVoid = YamlMFI.ReturnsVoid; IsWholeWaveFunction = YamlMFI.IsWholeWaveFunction; + RelaxedTBufferOOBMode = YamlMFI.RelaxedTBufferOOBMode; if (YamlMFI.ScavengeFI) { auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo()); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 45606153db58e..66b7d2e35c9f6 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -301,6 +301,7 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { bool HasInitWholeWave = false; bool IsWholeWaveFunction = false; + bool RelaxedTBufferOOBMode = false; unsigned DynamicVGPRBlockSize = 0; unsigned ScratchReservedForDynamicVGPRs = 0; @@ -362,6 +363,8 @@ template <> struct MappingTraits { YamlIO.mapOptional("scratchReservedForDynamicVGPRs", MFI.ScratchReservedForDynamicVGPRs, 0); YamlIO.mapOptional("isWholeWaveFunction", MFI.IsWholeWaveFunction, false); + YamlIO.mapOptional("RelaxedTBufferOOBMode", MFI.RelaxedTBufferOOBMode, + false); } }; @@ -528,6 +531,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, // scheduler stage. unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit; + // Enable relaxed TBUFFER out-of-bounds mode. Default is false. + bool RelaxedTBufferOOBMode = false; + MCPhysReg getNextUserSGPR() const; MCPhysReg getNextSystemSGPR() const; @@ -1212,6 +1218,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups[2]; } AMDGPU::ClusterDimsAttr getClusterDims() const { return ClusterDims; } + + bool isRelaxedTBufferOOBMode() const { return RelaxedTBufferOOBMode; } + void setRelaxedTBufferOOBMode(bool Enabled) { + RelaxedTBufferOOBMode = Enabled; + } }; } // end namespace llvm diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir index 402c00298c8da..7124d4d0f680b 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir @@ -3,6 +3,8 @@ --- name: gfx10_tbuffer_load_x_xyz +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz @@ -25,6 +27,8 @@ body: | --- name: gfx10_tbuffer_load_xyz_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_xyz_x @@ -47,6 +51,8 @@ body: | --- name: gfx10_tbuffer_load_xy_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy @@ -69,6 +75,8 @@ body: | --- name: gfx10_tbuffer_load_x_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_xy @@ -91,6 +99,8 @@ body: | --- name: gfx10_tbuffer_load_xy_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_xy_x @@ -113,6 +123,8 @@ body: | --- name: gfx10_tbuffer_load_x_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_x @@ -135,6 +147,8 @@ body: | --- name: gfx10_tbuffer_load_x_x_format_32_32_32_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_format_32_32_32_32 @@ -157,6 +171,8 @@ body: | --- name: gfx10_tbuffer_load_float_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_float_32 @@ -198,6 +214,8 @@ body: | --- name: gfx10_tbuffer_load_sint_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_sint_32 @@ -239,6 +257,8 @@ body: | --- name: gfx10_tbuffer_load_uint_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_uint_32 @@ -280,6 +300,8 @@ body: | --- name: gfx10_tbuffer_load_not_merged_data_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_data_format_mismatch @@ -315,6 +337,8 @@ body: | --- name: gfx10_tbuffer_load_not_merged_num_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_num_format_mismatch @@ -350,6 +374,8 @@ body: | --- name: gfx10_tbuffer_store_x_xyz +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -384,6 +410,8 @@ body: | --- name: gfx10_tbuffer_store_xyz_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -418,6 +446,8 @@ body: | --- name: gfx10_tbuffer_store_xy_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -454,6 +484,8 @@ body: | --- name: gfx10_tbuffer_store_x_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -488,6 +520,8 @@ body: | --- name: gfx10_tbuffer_store_xy_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -523,6 +557,8 @@ body: | --- name: gfx10_tbuffer_store_x_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -555,6 +591,8 @@ body: | --- name: gfx10_tbuffer_store_x_x_format_32_32_32_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -587,6 +625,8 @@ body: | --- name: gfx10_tbuffer_store_float32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -643,6 +683,8 @@ body: | --- name: gfx10_tbuffer_store_sint32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -699,6 +741,8 @@ body: | --- name: gfx10_tbuffer_store_uint32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -755,6 +799,8 @@ body: | --- name: gfx10_tbuffer_store_not_merged_data_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -811,6 +857,8 @@ body: | --- name: gfx10_tbuffer_store_not_merged_num_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -867,6 +915,8 @@ body: | --- name: gfx10_tbuffer_load_not_merged_swizzled_0 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_swizzled_0 @@ -888,6 +938,8 @@ body: | --- name: gfx10_tbuffer_load_not_merged_swizzled_1 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_swizzled_1 @@ -909,6 +961,8 @@ body: | --- name: gfx10_tbuffer_load_merge_across_swizzle +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_merge_across_swizzle @@ -933,6 +987,8 @@ body: | --- name: gfx10_tbuffer_load_x_x_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_idxen @@ -957,6 +1013,8 @@ body: | --- name: gfx10_tbuffer_load_x_xy_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_xy_idxen @@ -981,6 +1039,8 @@ body: | --- name: gfx10_tbuffer_load_xy_xy_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_idxen @@ -1005,6 +1065,8 @@ body: | --- name: gfx10_tbuffer_load_x_xyz_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz_idxen @@ -1029,6 +1091,8 @@ body: | --- name: gfx10_tbuffer_load_x_x_x_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_x_idxen_exact @@ -1056,6 +1120,8 @@ body: | --- name: gfx10_tbuffer_load_x_x_x_idxen_exact_swizzled_0 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_x_idxen_exact_swizzled_0 @@ -1082,6 +1148,8 @@ body: | --- name: gfx10_tbuffer_load_x_x_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_bothen @@ -1106,6 +1174,8 @@ body: | --- name: gfx10_tbuffer_load_x_xy_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_xy_bothen @@ -1130,6 +1200,8 @@ body: | --- name: gfx10_tbuffer_load_xy_xy_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_bothen @@ -1154,6 +1226,8 @@ body: | --- name: gfx10_tbuffer_load_x_xyz_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz_bothen @@ -1178,6 +1252,8 @@ body: | --- name: gfx10_tbuffer_load_x_x_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_idxen_exact @@ -1202,6 +1278,8 @@ body: | --- name: gfx10_tbuffer_load_x_xy_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_xy_idxen_exact @@ -1226,6 +1304,8 @@ body: | --- name: gfx10_tbuffer_load_xy_xy_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_idxen_exact @@ -1250,6 +1330,8 @@ body: | --- name: gfx10_tbuffer_load_x_xyz_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz_idxen_exact @@ -1274,6 +1356,8 @@ body: | --- name: gfx10_tbuffer_load_x_x_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_bothen_exact @@ -1298,6 +1382,8 @@ body: | --- name: gfx10_tbuffer_load_x_xy_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_xy_bothen_exact @@ -1322,6 +1408,8 @@ body: | --- name: gfx10_tbuffer_load_xy_xy_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_bothen_exact @@ -1346,6 +1434,8 @@ body: | --- name: gfx10_tbuffer_load_x_xyz_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz_bothen_exact @@ -1370,6 +1460,8 @@ body: | --- name: gfx10_tbuffer_load_x_x_x_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_x_bothen_exact @@ -1397,6 +1489,8 @@ body: | --- name: gfx10_tbuffer_load_x_x_x_bothen_exact_swizzled_0 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_x_bothen_exact_swizzled_0 @@ -1423,6 +1517,8 @@ body: | --- name: gfx10_tbuffer_load_xy_xy_bothen_exact_diff_vaddr +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_bothen_exact_diff_vaddr @@ -1448,6 +1544,8 @@ body: | --- name: gfx10_tbuffer_load_xy_xy_bothen_exact_diff_srsrc +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_bothen_exact_diff_srsrc @@ -1475,6 +1573,8 @@ body: | --- name: gfx10_tbuffer_load_xy_xy_idxen_exact_diff_vaddr +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_idxen_exact_diff_vaddr @@ -1500,6 +1600,8 @@ body: | --- name: gfx10_tbuffer_load_xy_xy_idxen_exact_diff_srsrc +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_idxen_exact_diff_srsrc diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir index f5407a5223166..9f234056c83ec 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir @@ -3,6 +3,8 @@ --- name: gfx11_tbuffer_load_x_xyz +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_xyz @@ -25,6 +27,8 @@ body: | --- name: gfx11_tbuffer_load_xyz_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_xyz_x @@ -47,6 +51,8 @@ body: | --- name: gfx11_tbuffer_load_xy_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy @@ -69,6 +75,8 @@ body: | --- name: gfx11_tbuffer_load_x_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_xy @@ -91,6 +99,8 @@ body: | --- name: gfx11_tbuffer_load_xy_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_xy_x @@ -113,6 +123,8 @@ body: | --- name: gfx11_tbuffer_load_x_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_x @@ -135,6 +147,8 @@ body: | --- name: gfx11_tbuffer_load_x_x_format_32_32_32_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_format_32_32_32_32 @@ -157,6 +171,8 @@ body: | --- name: gfx11_tbuffer_load_float_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_float_32 @@ -198,6 +214,8 @@ body: | --- name: gfx11_tbuffer_load_sint_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_sint_32 @@ -239,6 +257,8 @@ body: | --- name: gfx11_tbuffer_load_uint_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_uint_32 @@ -280,6 +300,8 @@ body: | --- name: gfx11_tbuffer_load_not_merged_data_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_not_merged_data_format_mismatch @@ -315,6 +337,8 @@ body: | --- name: gfx11_tbuffer_load_not_merged_num_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_not_merged_num_format_mismatch @@ -350,6 +374,8 @@ body: | --- name: gfx11_tbuffer_store_x_xyz +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -384,6 +410,8 @@ body: | --- name: gfx11_tbuffer_store_xyz_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -418,6 +446,8 @@ body: | --- name: gfx11_tbuffer_store_xy_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -454,6 +484,8 @@ body: | --- name: gfx11_tbuffer_store_x_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -488,6 +520,8 @@ body: | --- name: gfx11_tbuffer_store_xy_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -523,6 +557,8 @@ body: | --- name: gfx11_tbuffer_store_x_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -555,6 +591,8 @@ body: | --- name: gfx11_tbuffer_store_x_x_format_32_32_32_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -587,6 +625,8 @@ body: | --- name: gfx11_tbuffer_store_float32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -643,6 +683,8 @@ body: | --- name: gfx11_tbuffer_store_sint32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -699,6 +741,8 @@ body: | --- name: gfx11_tbuffer_store_uint32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -755,6 +799,8 @@ body: | --- name: gfx11_tbuffer_store_not_merged_data_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -811,6 +857,8 @@ body: | --- name: gfx11_tbuffer_store_not_merged_num_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -867,6 +915,8 @@ body: | --- name: gfx11_tbuffer_load_not_merged_swizzled_0 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_not_merged_swizzled_0 @@ -888,6 +938,8 @@ body: | --- name: gfx11_tbuffer_load_not_merged_swizzled_1 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_not_merged_swizzled_1 @@ -909,6 +961,8 @@ body: | --- name: gfx11_tbuffer_load_merge_across_swizzle +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_merge_across_swizzle @@ -933,6 +987,8 @@ body: | --- name: gfx11_tbuffer_load_x_x_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_idxen @@ -957,6 +1013,8 @@ body: | --- name: gfx11_tbuffer_load_x_xy_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_xy_idxen @@ -981,6 +1039,8 @@ body: | --- name: gfx11_tbuffer_load_xy_xy_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_idxen @@ -1005,6 +1065,8 @@ body: | --- name: gfx11_tbuffer_load_x_xyz_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_xyz_idxen @@ -1029,6 +1091,8 @@ body: | --- name: gfx11_tbuffer_load_x_x_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_bothen @@ -1053,6 +1117,8 @@ body: | --- name: gfx11_tbuffer_load_x_xy_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_xy_bothen @@ -1077,6 +1143,8 @@ body: | --- name: gfx11_tbuffer_load_xy_xy_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_bothen @@ -1101,6 +1169,8 @@ body: | --- name: gfx11_tbuffer_load_x_xyz_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_xyz_bothen @@ -1125,6 +1195,8 @@ body: | --- name: gfx11_tbuffer_load_x_x_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_idxen_exact @@ -1149,6 +1221,8 @@ body: | --- name: gfx11_tbuffer_load_x_xy_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_xy_idxen_exact @@ -1173,6 +1247,8 @@ body: | --- name: gfx11_tbuffer_load_xy_xy_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_idxen_exact @@ -1197,6 +1273,8 @@ body: | --- name: gfx11_tbuffer_load_x_xyz_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_xyz_idxen_exact @@ -1221,6 +1299,8 @@ body: | --- name: gfx11_tbuffer_load_x_x_x_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_x_idxen_exact @@ -1248,6 +1328,8 @@ body: | --- name: gfx11_tbuffer_load_x_x_x_idxen_exact_swizzled_0 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_x_idxen_exact_swizzled_0 @@ -1274,6 +1356,8 @@ body: | --- name: gfx11_tbuffer_load_x_x_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_bothen_exact @@ -1298,6 +1382,8 @@ body: | --- name: gfx11_tbuffer_load_x_xy_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_xy_bothen_exact @@ -1322,6 +1408,8 @@ body: | --- name: gfx11_tbuffer_load_xy_xy_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_bothen_exact @@ -1346,6 +1434,8 @@ body: | --- name: gfx11_tbuffer_load_x_xyz_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_xyz_bothen_exact @@ -1370,6 +1460,8 @@ body: | --- name: gfx11_tbuffer_load_xy_xy_bothen_exact_diff_vaddr +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_bothen_exact_diff_vaddr @@ -1396,6 +1488,8 @@ body: | --- name: gfx11_tbuffer_load_xy_xy_bothen_exact_diff_srsrc +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_bothen_exact_diff_srsrc @@ -1423,6 +1517,8 @@ body: | --- name: gfx11_tbuffer_load_xy_xy_idxen_exact_diff_vaddr +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_idxen_exact_diff_vaddr @@ -1448,6 +1544,8 @@ body: | --- name: gfx11_tbuffer_load_xy_xy_idxen_exact_diff_srsrc +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_idxen_exact_diff_srsrc @@ -1475,6 +1573,8 @@ body: | --- name: gfx11_tbuffer_load_x_x_x_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_x_bothen_exact @@ -1502,6 +1602,8 @@ body: | --- name: gfx11_tbuffer_load_x_x_x_bothen_exact_swizzled_0 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_x_bothen_exact_swizzled_0 @@ -1528,6 +1630,8 @@ body: | --- name: gfx11_tbuffer_load_x_x_x_idxen_16bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0,$sgpr1,$sgpr2,$sgpr3,$vgpr0 @@ -1550,6 +1654,8 @@ body: | --- name: gfx11_tbuffer_load_idxen_16_bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 @@ -1598,6 +1704,8 @@ body: | --- name: gfx11_tbuffer_load_xy_xy_idxen_uint_16_bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0 @@ -1617,6 +1725,8 @@ body: | --- name: gfx11_tbuffer_load_xy_xy_idxen_sint_16_bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0 @@ -1636,6 +1746,8 @@ body: | --- name: gfx11_tbuffer_load_x_off2_off4_16bit_no_merge +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 @@ -1664,6 +1776,8 @@ body: | --- name: gfx11_tbuffer_store_x_x_x_idxen_16_bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 @@ -1696,6 +1810,8 @@ body: | --- name: gfx11_tbuffer_store_idxen_16_bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -1752,6 +1868,8 @@ body: | --- name: gfx11_tbuffer_store_xy_xy_uint_16_bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -1772,6 +1890,8 @@ body: | --- name: gfx11_tbuffer_store_xy_xy_sint_16_bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -1792,6 +1912,8 @@ body: | --- name: gfx11_tbuffer_load_x_x_x_idxen_8bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 @@ -1824,6 +1946,8 @@ body: | --- name: gfx11_tbuffer_load_idxen_8bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 @@ -1872,6 +1996,8 @@ body: | --- name: gfx11_tbuffer_load_xy_xy_idxen_uint_8bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 @@ -1901,6 +2027,8 @@ body: | --- name: gfx11_tbuffer_load_xy_xy_idxen_sint_8bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 @@ -1930,6 +2058,8 @@ body: | --- name: gfx11_tbuffer_load_x_off3_off4_8bit_no_merge +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 @@ -1958,6 +2088,8 @@ body: | --- name: gfx11_tbuffer_store_x_x_x_idxen_8bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 @@ -1990,6 +2122,8 @@ body: | --- name: gfx11_tbuffer_store_idxen_8bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -2046,6 +2180,8 @@ body: | --- name: gfx11_tbuffer_store_xy_xy_idxen_uint_8bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -2082,6 +2218,8 @@ body: | --- name: gfx11_tbuffer_store_xy_xy_idxen_sint_8bit +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -2118,6 +2256,8 @@ body: | --- name: gfx11_tbuffer_store_x_off3_off4_8bit_no_merge +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir index 1ee4f9e008197..fb223cf5ed7b8 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir @@ -3,6 +3,8 @@ --- name: gfx12_tbuffer_load_x_xyz +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_xyz @@ -25,6 +27,8 @@ body: | --- name: gfx12_tbuffer_load_xyz_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_xyz_x @@ -47,6 +51,8 @@ body: | --- name: gfx12_tbuffer_load_xy_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy @@ -69,6 +75,8 @@ body: | --- name: gfx12_tbuffer_load_x_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_xy @@ -91,6 +99,8 @@ body: | --- name: gfx12_tbuffer_load_xy_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_xy_x @@ -113,6 +123,8 @@ body: | --- name: gfx12_tbuffer_load_x_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_x @@ -135,6 +147,8 @@ body: | --- name: gfx12_tbuffer_load_x_x_format_32_32_32_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_format_32_32_32_32 @@ -157,6 +171,8 @@ body: | --- name: gfx12_tbuffer_load_float_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_float_32 @@ -198,6 +214,8 @@ body: | --- name: gfx12_tbuffer_load_sint_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_sint_32 @@ -239,6 +257,8 @@ body: | --- name: gfx12_tbuffer_load_uint_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_uint_32 @@ -280,6 +300,8 @@ body: | --- name: gfx12_tbuffer_load_not_merged_data_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_not_merged_data_format_mismatch @@ -315,6 +337,8 @@ body: | --- name: gfx12_tbuffer_load_not_merged_num_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_not_merged_num_format_mismatch @@ -350,6 +374,8 @@ body: | --- name: gfx12_tbuffer_store_x_xyz +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -384,6 +410,8 @@ body: | --- name: gfx12_tbuffer_store_xyz_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -418,6 +446,8 @@ body: | --- name: gfx12_tbuffer_store_xy_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -454,6 +484,8 @@ body: | --- name: gfx12_tbuffer_store_x_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -488,6 +520,8 @@ body: | --- name: gfx12_tbuffer_store_xy_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -523,6 +557,8 @@ body: | --- name: gfx12_tbuffer_store_x_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -555,6 +591,8 @@ body: | --- name: gfx12_tbuffer_store_x_x_format_32_32_32_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -587,6 +625,8 @@ body: | --- name: gfx12_tbuffer_store_float32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -643,6 +683,8 @@ body: | --- name: gfx12_tbuffer_store_sint32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -699,6 +741,8 @@ body: | --- name: gfx12_tbuffer_store_uint32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -755,6 +799,8 @@ body: | --- name: gfx12_tbuffer_store_not_merged_data_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -811,6 +857,8 @@ body: | --- name: gfx12_tbuffer_store_not_merged_num_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -867,6 +915,8 @@ body: | --- name: gfx12_tbuffer_load_not_merged_swizzled_0 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_not_merged_swizzled_0 @@ -888,6 +938,8 @@ body: | --- name: gfx12_tbuffer_load_not_merged_swizzled_1 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_not_merged_swizzled_1 @@ -909,6 +961,8 @@ body: | --- name: gfx12_tbuffer_load_merge_across_swizzle +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_merge_across_swizzle @@ -933,6 +987,8 @@ body: | --- name: gfx12_tbuffer_load_x_x_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_idxen @@ -957,6 +1013,8 @@ body: | --- name: gfx12_tbuffer_load_x_xy_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_xy_idxen @@ -981,6 +1039,8 @@ body: | --- name: gfx12_tbuffer_load_xy_xy_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy_idxen @@ -1005,6 +1065,8 @@ body: | --- name: gfx12_tbuffer_load_x_xyz_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_xyz_idxen @@ -1029,6 +1091,8 @@ body: | --- name: gfx12_tbuffer_load_x_x_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_bothen @@ -1053,6 +1117,8 @@ body: | --- name: gfx12_tbuffer_load_x_xy_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_xy_bothen @@ -1077,6 +1143,8 @@ body: | --- name: gfx12_tbuffer_load_xy_xy_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy_bothen @@ -1101,6 +1169,8 @@ body: | --- name: gfx12_tbuffer_load_x_xyz_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_xyz_bothen @@ -1125,6 +1195,8 @@ body: | --- name: gfx12_tbuffer_load_x_x_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_idxen_exact @@ -1149,6 +1221,8 @@ body: | --- name: gfx12_tbuffer_load_x_xy_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_xy_idxen_exact @@ -1173,6 +1247,8 @@ body: | --- name: gfx12_tbuffer_load_xy_xy_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy_idxen_exact @@ -1197,6 +1273,8 @@ body: | --- name: gfx12_tbuffer_load_x_xyz_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_xyz_idxen_exact @@ -1221,6 +1299,8 @@ body: | --- name: gfx12_tbuffer_load_x_x_x_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_x_idxen_exact @@ -1248,6 +1328,8 @@ body: | --- name: gfx12_tbuffer_load_x_x_x_idxen_exact_swizzled_0 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_x_idxen_exact_swizzled_0 @@ -1274,6 +1356,8 @@ body: | --- name: gfx12_tbuffer_load_x_x_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_bothen_exact @@ -1298,6 +1382,8 @@ body: | --- name: gfx12_tbuffer_load_x_xy_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_xy_bothen_exact @@ -1322,6 +1408,8 @@ body: | --- name: gfx12_tbuffer_load_xy_xy_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: @@ -1347,6 +1435,8 @@ body: | --- name: gfx12_tbuffer_load_x_xyz_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: @@ -1372,6 +1462,8 @@ body: | --- name: gfx12_tbuffer_load_xy_xy_bothen_exact_diff_vaddr +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy_bothen_exact_diff_vaddr @@ -1398,6 +1490,8 @@ body: | --- name: gfx12_tbuffer_load_xy_xy_bothen_exact_diff_srsrc +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy_bothen_exact_diff_srsrc @@ -1425,6 +1519,8 @@ body: | --- name: gfx12_tbuffer_load_xy_xy_idxen_exact_diff_vaddr +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy_idxen_exact_diff_vaddr @@ -1450,6 +1546,8 @@ body: | --- name: gfx12_tbuffer_load_xy_xy_idxen_exact_diff_srsrc +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy_idxen_exact_diff_srsrc @@ -1477,6 +1575,8 @@ body: | --- name: gfx12_tbuffer_load_x_x_x_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_x_bothen_exact @@ -1504,6 +1604,8 @@ body: | --- name: gfx12_tbuffer_load_x_x_x_bothen_exact_swizzled_0 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_x_bothen_exact_swizzled_0 diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir index 3a43e743de493..fe073fbfd5163 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir @@ -3,6 +3,8 @@ --- name: gfx9_tbuffer_load_x_xyz +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz @@ -25,6 +27,8 @@ body: | --- name: gfx9_tbuffer_load_xyz_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_xyz_x @@ -47,6 +51,8 @@ body: | --- name: gfx9_tbuffer_load_xy_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy @@ -69,6 +75,8 @@ body: | --- name: gfx9_tbuffer_load_x_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_xy @@ -91,6 +99,8 @@ body: | --- name: gfx9_tbuffer_load_xy_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_xy_x @@ -114,6 +124,8 @@ body: | name: gfx9_tbuffer_load_x_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_x @@ -136,6 +148,8 @@ body: | --- name: gfx9_tbuffer_load_x_x_format_32_32_32_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_format_32_32_32_32 @@ -158,6 +172,8 @@ body: | --- name: gfx9_tbuffer_load_float_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_float_32 @@ -199,6 +215,8 @@ body: | --- name: gfx9_tbuffer_load_sint_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_sint_32 @@ -240,6 +258,8 @@ body: | --- name: gfx9_tbuffer_load_uint_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_uint_32 @@ -281,6 +301,8 @@ body: | --- name: gfx9_tbuffer_load_not_merged_data_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_data_format_mismatch @@ -316,6 +338,8 @@ body: | --- name: gfx9_tbuffer_load_not_merged_num_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_num_format_mismatch @@ -351,6 +375,8 @@ body: | --- name: gfx9_tbuffer_store_x_xyz +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -385,6 +411,8 @@ body: | --- name: gfx9_tbuffer_store_xyz_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -419,6 +447,8 @@ body: | --- name: gfx9_tbuffer_store_xy_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -455,6 +485,8 @@ body: | --- name: gfx9_tbuffer_store_x_xy +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -489,6 +521,8 @@ body: | --- name: gfx9_tbuffer_store_xy_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -524,6 +558,8 @@ body: | --- name: gfx9_tbuffer_store_x_x +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -556,6 +592,8 @@ body: | --- name: gfx9_tbuffer_store_x_x_format_32_32_32_32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -588,6 +626,8 @@ body: | --- name: gfx9_tbuffer_store_float32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -644,6 +684,8 @@ body: | --- name: gfx9_tbuffer_store_sint32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -700,6 +742,8 @@ body: | --- name: gfx9_tbuffer_store_uint32 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -756,6 +800,8 @@ body: | --- name: gfx9_tbuffer_store_not_merged_data_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -812,6 +858,8 @@ body: | --- name: gfx9_tbuffer_store_not_merged_num_format_mismatch +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -868,6 +916,8 @@ body: | --- name: gfx9_tbuffer_load_not_merged_swizzled_0 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_swizzled_0 @@ -889,6 +939,8 @@ body: | --- name: gfx9_tbuffer_load_not_merged_swizzled_1 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_swizzled_1 @@ -910,6 +962,8 @@ body: | --- name: gfx9_tbuffer_load_merge_across_swizzle +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_merge_across_swizzle @@ -934,6 +988,8 @@ body: | --- name: gfx9_tbuffer_load_merge_across_swizzled_store +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_merge_across_swizzled_store @@ -959,6 +1015,8 @@ body: | --- name: gfx9_tbuffer_load_x_x_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_idxen @@ -983,6 +1041,8 @@ body: | --- name: gfx9_tbuffer_load_x_xy_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_xy_idxen @@ -1007,6 +1067,8 @@ body: | --- name: gfx9_tbuffer_load_xy_xy_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_idxen @@ -1031,6 +1093,8 @@ body: | --- name: gfx9_tbuffer_load_x_xyz_idxen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz_idxen @@ -1055,6 +1119,8 @@ body: | --- name: gfx9_tbuffer_load_x_x_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_bothen @@ -1079,6 +1145,8 @@ body: | --- name: gfx9_tbuffer_load_x_xy_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_xy_bothen @@ -1103,6 +1171,8 @@ body: | --- name: gfx9_tbuffer_load_xy_xy_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_bothen @@ -1127,6 +1197,8 @@ body: | --- name: gfx9_tbuffer_load_x_xyz_bothen +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz_bothen @@ -1151,6 +1223,8 @@ body: | --- name: gfx9_tbuffer_load_x_x_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_idxen_exact @@ -1175,6 +1249,8 @@ body: | --- name: gfx9_tbuffer_load_x_xy_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_xy_idxen_exact @@ -1199,6 +1275,8 @@ body: | --- name: gfx9_tbuffer_load_xy_xy_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_idxen_exact @@ -1223,6 +1301,8 @@ body: | --- name: gfx9_tbuffer_load_x_xyz_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz_idxen_exact @@ -1247,6 +1327,8 @@ body: | --- name: gfx9_tbuffer_load_x_x_x_idxen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_x_idxen_exact @@ -1274,6 +1356,8 @@ body: | --- name: gfx9_tbuffer_load_x_x_x_idxen_exact_swizzled_0 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_x_idxen_exact_swizzled_0 @@ -1300,6 +1384,8 @@ body: | --- name: gfx9_tbuffer_load_x_x_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_bothen_exact @@ -1324,6 +1410,8 @@ body: | --- name: gfx9_tbuffer_load_x_xy_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_xy_bothen_exact @@ -1348,6 +1436,8 @@ body: | --- name: gfx9_tbuffer_load_xy_xy_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_bothen_exact @@ -1372,6 +1462,8 @@ body: | --- name: gfx9_tbuffer_load_x_xyz_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz_bothen_exact @@ -1396,6 +1488,8 @@ body: | --- name: gfx9_tbuffer_load_x_x_x_bothen_exact +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_x_bothen_exact @@ -1423,6 +1517,8 @@ body: | --- name: gfx9_tbuffer_load_x_x_x_bothen_exact_swizzled_0 +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_x_bothen_exact_swizzled_0 @@ -1449,6 +1545,8 @@ body: | --- name: gfx9_tbuffer_load_xy_xy_bothen_exact_diff_vaddr +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_bothen_exact_diff_vaddr @@ -1474,6 +1572,8 @@ body: | --- name: gfx9_tbuffer_load_xy_xy_bothen_exact_diff_srsrc +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_bothen_exact_diff_srsrc @@ -1501,6 +1601,8 @@ body: | --- name: gfx9_tbuffer_load_xy_xy_idxen_exact_diff_vaddr +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_idxen_exact_diff_vaddr @@ -1526,6 +1628,8 @@ body: | --- name: gfx9_tbuffer_load_xy_xy_idxen_exact_diff_srsrc +machineFunctionInfo: + RelaxedTBufferOOBMode: true body: | bb.0.entry: ; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_idxen_exact_diff_srsrc diff --git a/llvm/test/CodeGen/AMDGPU/relaxed-tbuffer-oob-mode.mir b/llvm/test/CodeGen/AMDGPU/relaxed-tbuffer-oob-mode.mir new file mode 100644 index 0000000000000..84171b1d99204 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/relaxed-tbuffer-oob-mode.mir @@ -0,0 +1,71 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=si-load-store-opt -verify-machineinstrs %s -o - | FileCheck %s + +--- | + target triple = "amdgcn" + + define float @relaxed-tbuffer-oob-mode(<4 x i32> %vec, i32 %index) #0 { + %1 = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %vec, i32 %index, i32 0, i32 0, i32 22, i32 0) + %2 = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %vec, i32 %index, i32 4, i32 0, i32 22, i32 0) + %3 = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %vec, i32 %index, i32 8, i32 0, i32 22, i32 0) + %4 = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %vec, i32 %index, i32 12, i32 0, i32 22, i32 0) + %5 = bitcast i32 %1 to float + %6 = bitcast i32 %2 to float + %7 = bitcast i32 %3 to float + %8 = bitcast i32 %4 to float + %add = fadd float %5, %6 + %mul = fmul float %7, %8 + %res = fadd float %add, %mul + ret float %res + } + + !llvm.module.flags = !{!0} + !0 = !{i32 4, !"amdgpu.oob.mode", i32 2} +... +--- +name: relaxed-tbuffer-oob-mode +machineFunctionInfo: + RelaxedTBufferOOBMode: true +body: | + bb.0 (%ir-block.0): + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + + ; CHECK-LABEL: name: relaxed-tbuffer-oob-mode + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 63, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN]].sub0_sub1_sub2 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY killed [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN]].sub3 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[COPY5]].sub0_sub1 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[COPY5]].sub2 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]].sub1 + ; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[COPY9]], 0, killed [[COPY10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, killed [[COPY8]], 0, killed [[COPY6]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_ADD_F32_e64_]], 0, killed [[V_MUL_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_1]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %12:vgpr_32 = COPY $vgpr4 + %11:vgpr_32 = COPY $vgpr3 + %10:vgpr_32 = COPY $vgpr2 + %9:vgpr_32 = COPY $vgpr1 + %8:vgpr_32 = COPY $vgpr0 + %13:sgpr_128 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1, %10, %subreg.sub2, %11, %subreg.sub3 + %14:sreg_32 = S_MOV_B32 0 + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN %12, %13, %14, 0, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + %16:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN %12, %13, %14, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + %17:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN %12, %13, %14, 8, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + %18:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN %12, %13, %14, 12, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + %19:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed %15, 0, killed %16, 0, 0, implicit $mode, implicit $exec + %20:vgpr_32 = nofpexcept V_MUL_F32_e64 0, killed %17, 0, killed %18, 0, 0, implicit $mode, implicit $exec + %21:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed %19, 0, killed %20, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %21 + SI_RETURN implicit $vgpr0 +... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll index 929db4c9be1c7..cbde52ed190c8 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll @@ -49,6 +49,7 @@ ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false +; CHECK-NEXT: RelaxedTBufferOOBMode: false ; CHECK-NEXT: body: define amdgpu_kernel void @long_branch_used_all_sgprs(ptr addrspace(1) %arg, i32 %cnd) #0 { entry: @@ -321,6 +322,7 @@ ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false +; CHECK-NEXT: RelaxedTBufferOOBMode: false ; CHECK-NEXT: body: define amdgpu_kernel void @long_branch_high_num_sgprs_used(ptr addrspace(1) %arg, i32 %cnd) #0 { entry: diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll index f054bea1f2780..0fc739df40fc4 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll @@ -49,6 +49,7 @@ ; AFTER-PEI-NEXT: dynamicVGPRBlockSize: 0 ; AFTER-PEI-NEXT: scratchReservedForDynamicVGPRs: 0 ; AFTER-PEI-NEXT: isWholeWaveFunction: false +; AFTER-PEI-NEXT: RelaxedTBufferOOBMode: false ; AFTER-PEI-NEXT: body: define amdgpu_kernel void @scavenge_fi(ptr addrspace(1) %out, i32 %in) #0 { %wide.sgpr0 = call <32 x i32> asm sideeffect "; def $0", "=s" () #0 diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll index 924216efcc461..1c8c8bd551934 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll @@ -49,6 +49,7 @@ ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false +; CHECK-NEXT: RelaxedTBufferOOBMode: false ; CHECK-NEXT: body: define amdgpu_kernel void @uniform_long_forward_branch_debug(ptr addrspace(1) %arg, i32 %arg1) #0 !dbg !5 { bb0: diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll index 39f1ddd0609d8..8727278ca8061 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll @@ -49,6 +49,7 @@ ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false +; CHECK-NEXT: RelaxedTBufferOOBMode: false ; CHECK-NEXT: body: define amdgpu_kernel void @uniform_long_forward_branch(ptr addrspace(1) %arg, i32 %arg1) #0 { bb0: diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir index 0cb9bc095bc50..c3bcad547ba57 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir @@ -58,6 +58,7 @@ # FULL-NEXT: dynamicVGPRBlockSize: 0 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0 # FULL-NEXT: isWholeWaveFunction: false +# FULL-NEXT: RelaxedTBufferOOBMode: false # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: @@ -168,6 +169,7 @@ body: | # FULL-NEXT: dynamicVGPRBlockSize: 0 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0 # FULL-NEXT: isWholeWaveFunction: false +# FULL-NEXT: RelaxedTBufferOOBMode: false # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: @@ -249,6 +251,7 @@ body: | # FULL-NEXT: dynamicVGPRBlockSize: 0 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0 # FULL-NEXT: isWholeWaveFunction: false +# FULL-NEXT: RelaxedTBufferOOBMode: false # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: @@ -331,6 +334,7 @@ body: | # FULL-NEXT: dynamicVGPRBlockSize: 0 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0 # FULL-NEXT: isWholeWaveFunction: false +# FULL-NEXT: RelaxedTBufferOOBMode: false # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll index ab4383b675243..1a2881d4ab74a 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -59,6 +59,7 @@ ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false +; CHECK-NEXT: RelaxedTBufferOOBMode: false ; CHECK-NEXT: body: define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { %gep = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %arg0 @@ -111,6 +112,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false +; CHECK-NEXT: RelaxedTBufferOOBMode: false ; CHECK-NEXT: body: define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) { %gep = getelementptr inbounds [128 x i32], ptr addrspace(2) @gds, i32 0, i32 %arg0 @@ -187,6 +189,7 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 { ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false +; CHECK-NEXT: RelaxedTBufferOOBMode: false ; CHECK-NEXT: body: define void @function() { ret void @@ -245,6 +248,7 @@ define void @function() { ; CHECK-NEXT: dynamicVGPRBlockSize: 0 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0 ; CHECK-NEXT: isWholeWaveFunction: false +; CHECK-NEXT: RelaxedTBufferOOBMode: false ; CHECK-NEXT: body: define void @function_nsz() #0 { ret void