From 119d5fe36f3b0625afabfb13f7c302a67cc5b7e3 Mon Sep 17 00:00:00 2001 From: niconunezz Date: Tue, 30 Sep 2025 22:29:26 +0200 Subject: [PATCH 1/8] [AMDGPU] Insert copy when only one register can be constrained --- .../Target/AMDGPU/SILoadStoreOptimizer.cpp | 19 +++++++--- .../load-store-opt-ds-regclass-constrain.mir | 37 +++++++++++++++++++ 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index f0d1117664983..21bd75c229042 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1352,12 +1352,21 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI, DataRC1, SubReg); } - if (!MRI->constrainRegClass(Data0->getReg(), DataRC0) || - !MRI->constrainRegClass(Data1->getReg(), DataRC1)) + bool constrainData0 = MRI->constrainRegClass(Data0->getReg(), DataRC0); + bool constrainData1 = MRI->constrainRegClass(Data1->getReg(), DataRC1); + if (!constrainData0 && !constrainData1) { return nullptr; - - // TODO: If one register can be constrained, and not the other, insert a - // copy. + } else if (!constrainData0 || !constrainData1) { + MachineBasicBlock::iterator InsertBefore = CI.I; + MachineBasicBlock *MBB = CI.I->getParent(); + DebugLoc DL = CI.I->getDebugLoc(); + const MachineOperand *activeData = !constrainData0 ? Data0 : Data1; + Register BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + BuildMI(*MBB, InsertBefore, DL, CopyDesc, BaseReg) + .addReg(activeData->getReg(), 0); + const_cast(activeData)->setReg(BaseReg); + } } return Where; diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir index 33f210533e10b..7a505f599254e 100644 --- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir +++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir @@ -206,5 +206,42 @@ body: | %2:av_64_align2 = COPY $vgpr4_vgpr5 DS_WRITE_B64_gfx9 %0, %1, 512, 0, implicit $exec :: (store (s64), addrspace 3) DS_WRITE_B64_gfx9 %0, %2, 1536, 0, implicit $exec :: (store (s64), addrspace 3) +... + +--- +name: ds_write_b32__av32_physical +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: ds_write_b32__av32_physical + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3) + %0:vgpr_32 = COPY $vgpr0 + %1:av_32 = COPY $vgpr1 + DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3) ... + +--- +name: ds_write_b32__physical_av32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: ds_write_b32__physical_av32 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3) + %0:vgpr_32 = COPY $vgpr0 + %2:av_32 = COPY $vgpr2 + DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3) \ No newline at end of file From e9f7002f13bc93de12933fdb53b6db50a1e0d73b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?= <125479151+niconunezz@users.noreply.github.com> Date: Wed, 1 Oct 2025 16:05:33 +0200 Subject: [PATCH 2/8] Update llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir Co-authored-by: Matt Arsenault --- .../CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir index 7a505f599254e..0cb91faf088a5 100644 --- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir +++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir @@ -244,4 +244,5 @@ body: | %0:vgpr_32 = COPY $vgpr0 %2:av_32 = COPY $vgpr2 DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3) - DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3) \ No newline at end of file + DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3) +... From 80619de370adbe44e2347cb0a4c7b4d36f4daf3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?= <125479151+niconunezz@users.noreply.github.com> Date: Wed, 1 Oct 2025 16:10:31 +0200 Subject: [PATCH 3/8] Update llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir Co-authored-by: Matt Arsenault --- .../test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir index 0cb91faf088a5..29d2ca7337b22 100644 --- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir +++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir @@ -225,6 +225,7 @@ body: | %1:av_32 = COPY $vgpr1 DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3) DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3) +... ... From b716facf2230cc867b69a2414429525e8f5cd0ce Mon Sep 17 00:00:00 2001 From: niconunezz Date: Wed, 1 Oct 2025 18:24:33 +0200 Subject: [PATCH 4/8] address suggested changes --- .../Target/AMDGPU/SILoadStoreOptimizer.cpp | 24 +++++++---- .../load-store-opt-ds-regclass-constrain.mir | 40 +++++++++++++++++++ 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 21bd75c229042..4df78b9e946da 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1352,20 +1352,28 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI, DataRC1, SubReg); } - bool constrainData0 = MRI->constrainRegClass(Data0->getReg(), DataRC0); - bool constrainData1 = MRI->constrainRegClass(Data1->getReg(), DataRC1); - if (!constrainData0 && !constrainData1) { + bool canBeConstrainedData0 = + MRI->constrainRegClass(Data0->getReg(), DataRC0); + bool canBeConstrainedData1 = + MRI->constrainRegClass(Data1->getReg(), DataRC1); + if (!canBeConstrainedData0 && !canBeConstrainedData1) { return nullptr; - } else if (!constrainData0 || !constrainData1) { + } + if (!canBeConstrainedData0 || !canBeConstrainedData1) { MachineBasicBlock::iterator InsertBefore = CI.I; MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); - const MachineOperand *activeData = !constrainData0 ? Data0 : Data1; - Register BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + const DebugLoc &DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(), + Paired.I->getDebugLoc()); + const CombineInfo &ActiveCI = canBeConstrainedData0 ? Paired : CI; + MachineOperand *activeData = + TII->getNamedOperand(*ActiveCI.I, AMDGPU::OpName::data0); const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const TargetRegisterClass *RC = getDataRegClass(*CI.I); + Register BaseReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertBefore, DL, CopyDesc, BaseReg) .addReg(activeData->getReg(), 0); - const_cast(activeData)->setReg(BaseReg); + + activeData->setReg(BaseReg); } } diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir index 29d2ca7337b22..4a142f1c1dca2 100644 --- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir +++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir @@ -247,3 +247,43 @@ body: | DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3) DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3) ... + + +--- +name: ds_write_b64__physical_av64 +body: | + bb.0: + liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5 + + ; CHECK-LABEL: name: ds_write_b64__physical_av64 + ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: DS_WRITE2_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 5, 12, 0, implicit $exec :: (store (s64), addrspace 3) + %0:vgpr_32 = COPY $vgpr0 + %1:av_64_align2 = COPY $vgpr2_vgpr3 + DS_WRITE_B64_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s64), addrspace 3) + DS_WRITE_B64_gfx9 %0, $vgpr4_vgpr5, 96, 0, implicit $exec :: (store (s64), addrspace 3) + + +... + +--- +name: ds_write_b64__av64__physical +body: | + bb.0: + liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5 + + ; CHECK-LABEL: name: ds_write_b64__av64__physical + ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: DS_WRITE2_B64_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 5, 12, 0, implicit $exec :: (store (s64), addrspace 3) + %0:vgpr_32 = COPY $vgpr0 + %2:av_64_align2 = COPY $vgpr4_vgpr5 + DS_WRITE_B64_gfx9 %0, $vgpr2_vgpr3, 40, 0, implicit $exec :: (store (s64), addrspace 3) + DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3) \ No newline at end of file From 181d98fe7134480e93384f7055ef9bad24e29800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?= <125479151+niconunezz@users.noreply.github.com> Date: Fri, 10 Oct 2025 20:08:51 +0200 Subject: [PATCH 5/8] Update llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir Co-authored-by: Matt Arsenault --- .../CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir index 4a142f1c1dca2..b3d0fc8b50d38 100644 --- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir +++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir @@ -286,4 +286,6 @@ body: | %0:vgpr_32 = COPY $vgpr0 %2:av_64_align2 = COPY $vgpr4_vgpr5 DS_WRITE_B64_gfx9 %0, $vgpr2_vgpr3, 40, 0, implicit $exec :: (store (s64), addrspace 3) - DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3) \ No newline at end of file + DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3) + ... + \ No newline at end of file From dd4252e237e4a270fde7e90760a39da1ecd8182e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?= <125479151+niconunezz@users.noreply.github.com> Date: Fri, 10 Oct 2025 20:09:15 +0200 Subject: [PATCH 6/8] Update llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir Co-authored-by: Matt Arsenault --- .../test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir index b3d0fc8b50d38..7fa762b181568 100644 --- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir +++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir @@ -266,6 +266,7 @@ body: | %1:av_64_align2 = COPY $vgpr2_vgpr3 DS_WRITE_B64_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s64), addrspace 3) DS_WRITE_B64_gfx9 %0, $vgpr4_vgpr5, 96, 0, implicit $exec :: (store (s64), addrspace 3) +... ... From b7a84e426e787df93f0875a4f9e7eeb0d52efc75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?= <125479151+niconunezz@users.noreply.github.com> Date: Fri, 10 Oct 2025 20:09:25 +0200 Subject: [PATCH 7/8] Update llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir Co-authored-by: Matt Arsenault --- .../test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir index 7fa762b181568..b29fe4c8e7dc3 100644 --- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir +++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir @@ -248,7 +248,6 @@ body: | DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3) ... - --- name: ds_write_b64__physical_av64 body: | From 06c1b3eae06a9f92afcb2d119909f5a9705ceaae Mon Sep 17 00:00:00 2001 From: niconunezz Date: Fri, 10 Oct 2025 21:31:56 +0200 Subject: [PATCH 8/8] addressed suggested changes --- .../Target/AMDGPU/SILoadStoreOptimizer.cpp | 3 + .../load-store-opt-ds-regclass-constrain.mir | 66 ++++--------------- 2 files changed, 15 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 4df78b9e946da..57d3eadfcc5ef 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1352,6 +1352,9 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI, DataRC1, SubReg); } + if (Data0->getReg().isPhysical() || Data1->getReg().isPhysical()) { + return nullptr; + } bool canBeConstrainedData0 = MRI->constrainRegClass(Data0->getReg(), DataRC0); bool canBeConstrainedData1 = diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir index b29fe4c8e7dc3..34efcadc2951d 100644 --- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir +++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir @@ -217,18 +217,17 @@ body: | ; CHECK-LABEL: name: ds_write_b32__av32_physical ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3) + ; CHECK-NEXT: %0:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: %1:av_32 = COPY $vgpr1 + ; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3) + ; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3) + %0:vgpr_32 = COPY $vgpr0 %1:av_32 = COPY $vgpr1 DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3) DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3) ... -... - --- name: ds_write_b32__physical_av32 body: | @@ -238,54 +237,13 @@ body: | ; CHECK-LABEL: name: ds_write_b32__physical_av32 ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3) - %0:vgpr_32 = COPY $vgpr0 - %2:av_32 = COPY $vgpr2 - DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3) - DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3) -... - ---- -name: ds_write_b64__physical_av64 -body: | - bb.0: - liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-NEXT: %0:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: %1:av_32 = COPY $vgpr2 + ; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3) + ; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, %1, 96, 0, implicit $exec :: (store (s32), addrspace 3) - ; CHECK-LABEL: name: ds_write_b64__physical_av64 - ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: DS_WRITE2_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 5, 12, 0, implicit $exec :: (store (s64), addrspace 3) %0:vgpr_32 = COPY $vgpr0 - %1:av_64_align2 = COPY $vgpr2_vgpr3 - DS_WRITE_B64_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s64), addrspace 3) - DS_WRITE_B64_gfx9 %0, $vgpr4_vgpr5, 96, 0, implicit $exec :: (store (s64), addrspace 3) -... - - + %1:av_32 = COPY $vgpr2 + DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE_B32_gfx9 %0, %1, 96, 0, implicit $exec :: (store (s32), addrspace 3) ... - ---- -name: ds_write_b64__av64__physical -body: | - bb.0: - liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5 - - ; CHECK-LABEL: name: ds_write_b64__av64__physical - ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: DS_WRITE2_B64_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 5, 12, 0, implicit $exec :: (store (s64), addrspace 3) - %0:vgpr_32 = COPY $vgpr0 - %2:av_64_align2 = COPY $vgpr4_vgpr5 - DS_WRITE_B64_gfx9 %0, $vgpr2_vgpr3, 40, 0, implicit $exec :: (store (s64), addrspace 3) - DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3) - ... - \ No newline at end of file