Skip to content

Commit 70c627c

Browse files
author
Salinas, David
authored
fixes the error in rocfft and hipfft
Amd/dev/dsalinas/rocm70 promo cp row 30 (llvm#1761)
2 parents 9c59fa1 + 60bf26c commit 70c627c

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2420,11 +2420,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
24202420
assert(ST.useVGPRIndexMode());
24212421
Register VecReg = MI.getOperand(0).getReg();
24222422
bool IsUndef = MI.getOperand(1).isUndef();
2423-
Register Idx = MI.getOperand(3).getReg();
2423+
MachineOperand &Idx = MI.getOperand(3);
24242424
Register SubReg = MI.getOperand(4).getImm();
24252425

24262426
MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON))
2427-
.addReg(Idx)
2427+
.add(Idx)
24282428
.addImm(AMDGPU::VGPRIndexMode::DST_ENABLE);
24292429
SetOn->getOperand(3).setIsUndef();
24302430

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a | FileCheck %s
3+
4+
define amdgpu_kernel void @copy_to_reg_frameindex(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) {
5+
; CHECK-LABEL: copy_to_reg_frameindex:
6+
; CHECK: ; %bb.0: ; %entry
7+
; CHECK-NEXT: ; implicit-def: $vgpr0
8+
; CHECK-NEXT: .LBB0_1: ; %loop
9+
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
10+
; CHECK-NEXT: s_cmp_lt_u32 0, 16
11+
; CHECK-NEXT: s_set_gpr_idx_on 0, gpr_idx(DST)
12+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
13+
; CHECK-NEXT: s_set_gpr_idx_off
14+
; CHECK-NEXT: s_cbranch_scc1 .LBB0_1
15+
; CHECK-NEXT: ; %bb.2: ; %done
16+
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
17+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
18+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
19+
; CHECK-NEXT: global_store_dword v1, v0, s[0:1]
20+
; CHECK-NEXT: s_endpgm
21+
entry:
22+
%B = srem i32 %c, -1
23+
br label %loop
24+
25+
loop:
26+
%promotealloca = phi <16 x i32> [ undef, %entry ], [ %0, %loop ]
27+
%inc = phi i32 [ 0, %entry ], [ %inc.i, %loop ]
28+
%0 = insertelement <16 x i32> %promotealloca, i32 %inc, i32 %inc
29+
%inc.i = add i32 %inc, %B
30+
%cnd = icmp uge i32 %inc.i, 16
31+
br i1 %cnd, label %done, label %loop
32+
33+
done:
34+
%1 = extractelement <16 x i32> %0, i32 0
35+
store i32 %1, ptr addrspace(1) %out, align 4
36+
ret void
37+
}

0 commit comments

Comments
 (0)