Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
let isMoveImm = 1;
let SchedRW = [Write64Bit];
let Size = 4;
let VOP1 = 1; // Not entirely correct, but close enough.
let UseNamedOperandTable = 1;
}

Expand Down
22 changes: 21 additions & 1 deletion llvm/test/CodeGen/AMDGPU/remat-sop.mir
Original file line number Diff line number Diff line change
Expand Up @@ -653,4 +653,24 @@ body: |
S_ENDPGM 0
...


---
name: test_remat_s_mov_b64_imm_pseudo
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_s_mov_b64_imm_pseudo
; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64_IMM_PSEUDO 1
; GCN-NEXT: renamable $sgpr2_sgpr3 = S_MOV_B64_IMM_PSEUDO 2
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3
; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MOV_B64_IMM_PSEUDO 3
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1
; GCN-NEXT: S_ENDPGM 0
%0:sgpr_64 = S_MOV_B64_IMM_PSEUDO 1
%1:sgpr_64 = S_MOV_B64_IMM_PSEUDO 2
%2:sgpr_64 = S_MOV_B64_IMM_PSEUDO 3
S_NOP 0, implicit %0
S_NOP 0, implicit %1
S_NOP 0, implicit %2
S_ENDPGM 0
...
78 changes: 34 additions & 44 deletions llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_mov_b64 s[36:37], s[6:7]
; GLOBALNESS1-NEXT: s_load_dwordx4 s[76:79], s[8:9], 0x0
; GLOBALNESS1-NEXT: s_load_dword s6, s[8:9], 0x14
; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v0
; GLOBALNESS1-NEXT: v_mov_b32_e32 v42, 0
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0
; GLOBALNESS1-NEXT: global_store_dword v[0:1], v42, off
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[44:45], 0, 0
; GLOBALNESS1-NEXT: global_store_dword v[44:45], v42, off
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS1-NEXT: global_load_dword v2, v42, s[76:77]
; GLOBALNESS1-NEXT: s_mov_b64 s[40:41], s[4:5]
Expand All @@ -46,6 +45,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GLOBALNESS1-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GLOBALNESS1-NEXT: s_add_u32 s0, s0, s17
; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v0
; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0
; GLOBALNESS1-NEXT: s_addc_u32 s1, s1, 0
; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x40994400
Expand Down Expand Up @@ -73,13 +73,15 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[52:53], 1, v0
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[44:45], 1, v1
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[46:47], 1, v3
; GLOBALNESS1-NEXT: v_mov_b32_e32 v46, 0x80
; GLOBALNESS1-NEXT: s_mov_b32 s70, s16
; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[8:9]
; GLOBALNESS1-NEXT: s_mov_b32 s71, s15
; GLOBALNESS1-NEXT: s_mov_b32 s72, s14
; GLOBALNESS1-NEXT: s_mov_b64 s[34:35], s[10:11]
; GLOBALNESS1-NEXT: v_mov_b32_e32 v47, 0
; GLOBALNESS1-NEXT: s_mov_b32 s32, 0
; GLOBALNESS1-NEXT: ; implicit-def: $vgpr44_vgpr45
; GLOBALNESS1-NEXT: ; implicit-def: $vgpr56_vgpr57
; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0)
; GLOBALNESS1-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2
; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
Expand All @@ -106,17 +108,15 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: .LBB1_3: ; %Flow28
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7]
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[44:45], v[0:1], v[0:1] op_sel:[0,1]
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[56:57], v[0:1], v[0:1] op_sel:[0,1]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_30
; GLOBALNESS1-NEXT: .LBB1_4: ; %bb5
; GLOBALNESS1-NEXT: ; =>This Loop Header: Depth=1
; GLOBALNESS1-NEXT: ; Child Loop BB1_16 Depth 2
; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0x80
; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0
; GLOBALNESS1-NEXT: flat_load_dword v40, v[0:1]
; GLOBALNESS1-NEXT: flat_load_dword v40, v[46:47]
; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
; GLOBALNESS1-NEXT: buffer_store_dword v42, off, s[0:3], 0
; GLOBALNESS1-NEXT: flat_load_dword v46, v[0:1]
; GLOBALNESS1-NEXT: flat_load_dword v58, v[46:47]
; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4
Expand Down Expand Up @@ -160,8 +160,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_24
; GLOBALNESS1-NEXT: ; %bb.10: ; %baz.exit.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0
; GLOBALNESS1-NEXT: flat_load_dword v0, v[2:3]
; GLOBALNESS1-NEXT: flat_load_dword v0, v[44:45]
; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[62:63], 0, v0
; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0
Expand All @@ -170,17 +169,16 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_26
; GLOBALNESS1-NEXT: ; %bb.11: ; %bb33.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[44:45], off
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[54:55]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_13
; GLOBALNESS1-NEXT: ; %bb.12: ; %bb39.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0
; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[42:43], off
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS1-NEXT: .LBB1_13: ; %bb44.lr.ph.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e32 vcc, 0, v46
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e32 vcc, 0, v58
; GLOBALNESS1-NEXT: v_cndmask_b32_e32 v2, 0, v40, vcc
; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0)
; GLOBALNESS1-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
Expand Down Expand Up @@ -237,7 +235,6 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[76:77]
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[46:47], 0, 0
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[68:69]
Expand All @@ -246,14 +243,14 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_mov_b32 s13, s71
; GLOBALNESS1-NEXT: s_mov_b32 s14, s70
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS1-NEXT: global_store_dwordx2 v[46:47], v[44:45], off
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[56:57], off
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[76:77]
; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[64:65]
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_14
; GLOBALNESS1-NEXT: ; %bb.23: ; %bb62.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42
; GLOBALNESS1-NEXT: global_store_dwordx2 v[46:47], v[42:43], off
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS1-NEXT: s_branch .LBB1_14
; GLOBALNESS1-NEXT: .LBB1_24: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1
Expand All @@ -274,14 +271,12 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: ; %bb.28: ; %bb69.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0
; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[42:43], off
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS1-NEXT: s_branch .LBB1_1
; GLOBALNESS1-NEXT: .LBB1_29: ; %bb73.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0
; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[42:43], off
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS1-NEXT: s_branch .LBB1_2
; GLOBALNESS1-NEXT: .LBB1_30: ; %loop.exit.guard
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5]
Expand Down Expand Up @@ -326,10 +321,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_mov_b64 s[36:37], s[6:7]
; GLOBALNESS0-NEXT: s_load_dwordx4 s[72:75], s[8:9], 0x0
; GLOBALNESS0-NEXT: s_load_dword s6, s[8:9], 0x14
; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v0
; GLOBALNESS0-NEXT: v_mov_b32_e32 v42, 0
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0
; GLOBALNESS0-NEXT: global_store_dword v[0:1], v42, off
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[44:45], 0, 0
; GLOBALNESS0-NEXT: global_store_dword v[44:45], v42, off
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS0-NEXT: global_load_dword v2, v42, s[72:73]
; GLOBALNESS0-NEXT: s_mov_b64 s[40:41], s[4:5]
Expand All @@ -338,6 +332,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GLOBALNESS0-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GLOBALNESS0-NEXT: s_add_u32 s0, s0, s17
; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v0
; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0
; GLOBALNESS0-NEXT: s_addc_u32 s1, s1, 0
; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x40994400
Expand Down Expand Up @@ -365,13 +360,15 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[52:53], 1, v0
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[44:45], 1, v1
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[46:47], 1, v3
; GLOBALNESS0-NEXT: v_mov_b32_e32 v46, 0x80
; GLOBALNESS0-NEXT: s_mov_b32 s68, s16
; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[8:9]
; GLOBALNESS0-NEXT: s_mov_b32 s69, s15
; GLOBALNESS0-NEXT: s_mov_b32 s70, s14
; GLOBALNESS0-NEXT: s_mov_b64 s[34:35], s[10:11]
; GLOBALNESS0-NEXT: v_mov_b32_e32 v47, 0
; GLOBALNESS0-NEXT: s_mov_b32 s32, 0
; GLOBALNESS0-NEXT: ; implicit-def: $vgpr44_vgpr45
; GLOBALNESS0-NEXT: ; implicit-def: $vgpr56_vgpr57
; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0)
; GLOBALNESS0-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2
; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
Expand All @@ -398,17 +395,15 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: .LBB1_3: ; %Flow28
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7]
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[44:45], v[0:1], v[0:1] op_sel:[0,1]
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[56:57], v[0:1], v[0:1] op_sel:[0,1]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_30
; GLOBALNESS0-NEXT: .LBB1_4: ; %bb5
; GLOBALNESS0-NEXT: ; =>This Loop Header: Depth=1
; GLOBALNESS0-NEXT: ; Child Loop BB1_16 Depth 2
; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0x80
; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0
; GLOBALNESS0-NEXT: flat_load_dword v40, v[0:1]
; GLOBALNESS0-NEXT: flat_load_dword v40, v[46:47]
; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
; GLOBALNESS0-NEXT: buffer_store_dword v42, off, s[0:3], 0
; GLOBALNESS0-NEXT: flat_load_dword v46, v[0:1]
; GLOBALNESS0-NEXT: flat_load_dword v58, v[46:47]
; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4
Expand Down Expand Up @@ -452,8 +447,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_24
; GLOBALNESS0-NEXT: ; %bb.10: ; %baz.exit.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0
; GLOBALNESS0-NEXT: flat_load_dword v0, v[2:3]
; GLOBALNESS0-NEXT: flat_load_dword v0, v[44:45]
; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[62:63], 0, v0
; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0
Expand All @@ -462,17 +456,16 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_26
; GLOBALNESS0-NEXT: ; %bb.11: ; %bb33.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[44:45], off
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[54:55]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_13
; GLOBALNESS0-NEXT: ; %bb.12: ; %bb39.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0
; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[42:43], off
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS0-NEXT: .LBB1_13: ; %bb44.lr.ph.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e32 vcc, 0, v46
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e32 vcc, 0, v58
; GLOBALNESS0-NEXT: v_cndmask_b32_e32 v2, 0, v40, vcc
; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0)
; GLOBALNESS0-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
Expand Down Expand Up @@ -529,7 +522,6 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[78:79]
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[46:47], 0, 0
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[72:73]
Expand All @@ -538,14 +530,14 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_mov_b32 s13, s69
; GLOBALNESS0-NEXT: s_mov_b32 s14, s68
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: global_store_dwordx2 v[46:47], v[44:45], off
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[56:57], off
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[78:79]
; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[64:65]
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_14
; GLOBALNESS0-NEXT: ; %bb.23: ; %bb62.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42
; GLOBALNESS0-NEXT: global_store_dwordx2 v[46:47], v[42:43], off
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS0-NEXT: s_branch .LBB1_14
; GLOBALNESS0-NEXT: .LBB1_24: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1
Expand All @@ -566,14 +558,12 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: ; %bb.28: ; %bb69.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0
; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[42:43], off
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS0-NEXT: s_branch .LBB1_1
; GLOBALNESS0-NEXT: .LBB1_29: ; %bb73.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0
; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[42:43], off
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
; GLOBALNESS0-NEXT: s_branch .LBB1_2
; GLOBALNESS0-NEXT: .LBB1_30: ; %loop.exit.guard
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5]
Expand Down
48 changes: 46 additions & 2 deletions llvm/test/CodeGen/AMDGPU/vgpr-remat.mir
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
# Check that we get two move-immediates into %1 and %2, instead of a copy from
# %1 to %2, because that would introduce a dependency and maybe a stall.
---
name: f
name: remat_v_mov_b32_e32
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: f
; CHECK-LABEL: name: remat_v_mov_b32_e32
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $sgpr0
Expand Down Expand Up @@ -46,3 +46,47 @@ body: |
%4.sub1:vreg_96 = COPY %2:vgpr_32
S_ENDPGM 0, implicit %4
...

---
name: remat_v_mov_b64_pseduo
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo "pseduo"

tracksRegLiveness: true
body: |
; CHECK-LABEL: name: remat_v_mov_b64_pseduo
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[V_MOV_B:%[0-9]+]].sub0_sub1:vreg_192_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
; CHECK-NEXT: [[V_MOV_B:%[0-9]+]].sub2_sub3:vreg_192_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK-NEXT: $exec = S_MOV_B64_term [[COPY]]
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MOV_B:%[0-9]+]].sub0_sub1:vreg_192_align2 = V_MUL_F64_e64 0, [[V_MOV_B]].sub0_sub1, 0, [[V_MOV_B]].sub0_sub1, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MOV_B:%[0-9]+]].sub2_sub3:vreg_192_align2 = V_MUL_F64_e64 0, [[V_MOV_B]].sub2_sub3, 0, [[V_MOV_B]].sub2_sub3, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]]
bb.0:
liveins: $sgpr0
%0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
%1:vreg_64_align2 = COPY %0:vreg_64_align2
%2:vreg_64_align2 = COPY %0:vreg_64_align2
%3:sreg_64 = COPY $sgpr0_sgpr1
$exec = S_MOV_B64_term %3:sreg_64
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1

bb.1:
%1:vreg_64_align2 = V_MUL_F64_e64 0, %1:vreg_64_align2, 0, %1:vreg_64_align2, 0, 0, implicit $mode, implicit $exec
%2:vreg_64_align2 = V_MUL_F64_e64 0, %2:vreg_64_align2, 0, %2:vreg_64_align2, 0, 0, implicit $mode, implicit $exec

bb.2:
undef %4.sub0_sub1:vreg_192 = COPY %1:vreg_64_align2
%4.sub2_sub3:vreg_192 = COPY %2:vreg_64_align2
S_ENDPGM 0, implicit %4
...