Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1337,6 +1337,7 @@ bool SIInstrInfo::getConstValDefinedInReg(const MachineInstr &MI,
case AMDGPU::S_MOV_B64:
case AMDGPU::V_MOV_B64_e32:
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
case AMDGPU::S_MOV_B64_IMM_PSEUDO:
case AMDGPU::V_MOV_B64_PSEUDO: {
const MachineOperand &Src0 = MI.getOperand(1);
Expand Down Expand Up @@ -2186,7 +2187,13 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.getMF()->getRegInfo().constrainRegClass(MI.getOperand(0).getReg(),
&AMDGPU::SReg_32_XM0RegClass);
break;

case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
Register Dst = MI.getOperand(0).getReg();
bool IsAGPR = SIRegisterInfo::isAGPRClass(RI.getPhysRegBaseClass(Dst));
MI.setDesc(
get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
break;
}
case AMDGPU::V_MOV_B64_PSEUDO: {
Register Dst = MI.getOperand(0).getReg();
Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
Expand Down Expand Up @@ -3423,6 +3430,7 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
case AMDGPU::V_ACCVGPR_READ_B32_e64:
case AMDGPU::V_ACCVGPR_MOV_B32:
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
return true;
default:
return false;
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,25 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
let UseNamedOperandTable = 1;
}

// 32-bit materialize immediate which supports AGPR or VGPR. Typically
// this should just expand to V_MOV_B32, unless $vdst happens to be
// allocated to an AGPR in which case it will lower to
// V_ACCVGPR_WRITE_B32. This should always use an inline immediate
// operand, as v_accvgpr_write_b32 does not support literal constants.
def AV_MOV_B32_IMM_PSEUDO
: VPseudoInstSI<(outs AV_32:$vdst), (ins VCSrc_b32:$src0)> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you create a AVDst_32 for the dst type? I'm trying to use a RegisterOperand instead of RegisterClass for all operands in instruction definitions because it makes it easier to swap out the underlying RegisterClass.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a pseudo, this doesn't really benefit from RegisterOperand. For example this will now require dead code in the disassembler since AVDstOperand will emit the custom decoder method

let isReMaterializable = 1;
let isAsCheapAsAMove = 1;

// Imprecise, technically if AGPR it's VOP3 and VOP1 for AGPR. But
// this tricks the rematerialize logic into working for it.
let VOP3 = 1;
let isMoveImm = 1;
let SchedRW = [Write32Bit];
let Size = 4;
let UseNamedOperandTable = 1;
}

// 64-bit vector move with dpp. Expanded post-RA.
def V_MOV_B64_DPP_PSEUDO : VOP_DPP_Pseudo <"v_mov_b64_dpp", VOP_I64_I64> {
let Size = 16; // Requires two 8-byte v_mov_b32_dpp to complete.
Expand Down
56 changes: 56 additions & 0 deletions llvm/test/CodeGen/AMDGPU/av_movimm_pseudo_expansion.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=postrapseudos %s -o - | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=postrapseudos %s -o - | FileCheck %s

---
name: av_mov_b32_imm_pseudo_agpr_0
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_agpr_0
; CHECK: $agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
$agpr0 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
...

---
name: av_mov_b32_imm_pseudo_agpr_64
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_agpr_64
; CHECK: $agpr0 = V_ACCVGPR_WRITE_B32_e64 64, implicit $exec
$agpr0 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
...

---
name: av_mov_b32_imm_pseudo_vgpr_0
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_vgpr_0
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr0 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
...

---
name: av_mov_b32_imm_pseudo_vgpr_64
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_vgpr_64
; CHECK: $vgpr0 = V_MOV_B32_e32 64, implicit $exec
$vgpr0 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
...

---
name: av_mov_b32_imm_pseudo_agpr_vgpr
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_agpr_vgpr
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec
$agpr1 = AV_MOV_B32_IMM_PSEUDO $vgpr0, implicit $exec
...
125 changes: 125 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,128 @@ body: |
S_ENDPGM 0

...

---
name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
; GCN-NEXT: S_ENDPGM 0, implicit $agpr0
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
$agpr0 = COPY %0
S_ENDPGM 0, implicit $agpr0

...

---
name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GCN-NEXT: $vgpr0 = COPY [[AV_MOV_]]
; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
$vgpr0 = COPY %0
S_ENDPGM 0, implicit $vgpr0

...

---
name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
%1:agpr_32 = COPY %0
S_ENDPGM 0, implicit %1

...

---
name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
%1:vgpr_32 = COPY %0
S_ENDPGM 0, implicit %1

...

---
name: v_mov_b32_imm_literal_copy_v_to_agpr_32
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_agpr_32
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B32_e32_]]
; GCN-NEXT: $agpr0 = COPY [[COPY]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
%1:agpr_32 = COPY %0
$agpr0 = COPY %1
S_ENDPGM 0

...

# FIXME: Register class restrictions of av register not respected,
# issue 130020

# ---
# name: s_mov_b32_inlineimm_copy_s_to_av_32
# tracksRegLiveness: true
# body: |
# bb.0:
# %0:sreg_32 = S_MOV_B32 32
# %1:av_32 = COPY %0
# $agpr0 = COPY %1
# S_ENDPGM 0

# ...

# ---
# name: v_mov_b32_inlineimm_copy_v_to_av_32
# tracksRegLiveness: true
# body: |
# bb.0:
# %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
# %1:av_32 = COPY %0
# $agpr0 = COPY %1
# S_ENDPGM 0
# ...

# ---
# name: s_mov_b32_imm_literal_copy_s_to_av_32
# tracksRegLiveness: true
# body: |
# bb.0:
# %0:sreg_32 = S_MOV_B32 999
# %1:av_32 = COPY %0
# $agpr0 = COPY %1
# S_ENDPGM 0

# ...

# ---
# name: v_mov_b32_imm_literal_copy_v_to_av_32
# tracksRegLiveness: true
# body: |
# bb.0:
# %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
# %1:av_32 = COPY %0
# $agpr0 = COPY %1
# S_ENDPGM 0

# ...
113 changes: 113 additions & 0 deletions llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -simplify-mir -start-before=greedy,2 -stress-regalloc=3 -stop-after=postrapseudos -o - -verify-regalloc %s | FileCheck %s

# Compare results of using V_MOV_B32 vs. AV_MOV_B32_IMM_PSEUDO during
# allocation.

---
name: av_mov_b32_split
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr72_sgpr73_sgpr74_sgpr75'
stackPtrOffsetReg: '$sgpr32'
occupancy: 7
body: |
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5

; CHECK-LABEL: name: av_mov_b32_split
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
; CHECK-NEXT: renamable $agpr1 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
; CHECK-NEXT: renamable $agpr2 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
; CHECK-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec
; CHECK-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
; CHECK-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 4, implicit $exec
; CHECK-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr1
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr2
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
%0:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
%1:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 1, implicit $exec
%2:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 2, implicit $exec
%3:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 3, implicit $exec
%4:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 4, implicit $exec

%5:agpr_32 = COPY %0
%6:agpr_32 = COPY %1
%7:agpr_32 = COPY %2
%8:agpr_32 = COPY %3
%9:agpr_32 = COPY %4

S_NOP 0, implicit %5
S_NOP 0, implicit %6
S_NOP 0, implicit %7
S_NOP 0, implicit %8
S_NOP 0, implicit %9

...

---
name: v_mov_b32_split
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr72_sgpr73_sgpr74_sgpr75'
stackPtrOffsetReg: '$sgpr32'
occupancy: 7
body: |
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5

; CHECK-LABEL: name: v_mov_b32_split
; CHECK: liveins: $vgpr0, $vgpr3, $vgpr4, $vgpr5, $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: renamable $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; CHECK-NEXT: renamable $vgpr2 = V_MOV_B32_e32 2, implicit $exec
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
; CHECK-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec
; CHECK-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $exec
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
; CHECK-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 4, implicit $exec
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
; CHECK-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr1
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr2
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
%2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
%3:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
%4:vgpr_32 = V_MOV_B32_e32 4, implicit $exec

%5:agpr_32 = COPY %0
%6:agpr_32 = COPY %1
%7:agpr_32 = COPY %2
%8:agpr_32 = COPY %3
%9:agpr_32 = COPY %4

S_NOP 0, implicit %5
S_NOP 0, implicit %6
S_NOP 0, implicit %7
S_NOP 0, implicit %8
S_NOP 0, implicit %9

...

Loading