Skip to content

Commit 7368d31

Browse files
authored
GFX13: Fix incorrect shrinking of V_ADD_CO_U32_e64 (#3361)
This instruction is VOP3-only so it should not be shrunk to V_ADD_CO_U32_e32. Fixes: SC1-6350
1 parent 1c7fef2 commit 7368d31

File tree

2 files changed

+69
-0
lines changed

2 files changed

+69
-0
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,9 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
877877
// Make sure to get the 32-bit version of the commuted opcode.
878878
unsigned MaybeCommutedOpc = MI->getOpcode();
879879
Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
880+
881+
if (TII->pseudoToMCOpcode(Op32) == -1)
882+
Op32 = -1;
880883
}
881884

882885
appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, /*Commuted=*/true,
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=wavefrontsize32 < %s | FileCheck %s --check-prefixes=GFX12-W32
3+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=wavefrontsize64 < %s | FileCheck %s --check-prefixes=GFX12-W64
4+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1250 -mattr=wavefrontsize32 < %s | FileCheck %s --check-prefixes=GFX1250
5+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1300 -mattr=wavefrontsize32 < %s | FileCheck %s --check-prefixes=GFX13-W32
6+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1300 -mattr=wavefrontsize64 < %s | FileCheck %s --check-prefixes=GFX13-W64
7+
8+
@sym = external addrspace(4) constant [2 x <3 x i32>]
9+
10+
define amdgpu_gs ptr addrspace(4) @v_add_symbol(i64 %arg) {
11+
; GFX12-W32-LABEL: v_add_symbol:
12+
; GFX12-W32: ; %bb.0:
13+
; GFX12-W32-NEXT: v_lshlrev_b64_e32 v[0:1], 4, v[0:1]
14+
; GFX12-W32-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
15+
; GFX12-W32-NEXT: v_add_co_u32 v0, vcc_lo, sym@abs32@lo, v0
16+
; GFX12-W32-NEXT: v_add_co_ci_u32_e64 v1, null, sym@abs32@hi, v1, vcc_lo
17+
; GFX12-W32-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
18+
; GFX12-W32-NEXT: v_readfirstlane_b32 s0, v0
19+
; GFX12-W32-NEXT: v_readfirstlane_b32 s1, v1
20+
; GFX12-W32-NEXT: ; return to shader part epilog
21+
;
22+
; GFX12-W64-LABEL: v_add_symbol:
23+
; GFX12-W64: ; %bb.0:
24+
; GFX12-W64-NEXT: v_lshlrev_b64_e32 v[0:1], 4, v[0:1]
25+
; GFX12-W64-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
26+
; GFX12-W64-NEXT: v_add_co_u32 v0, vcc, sym@abs32@lo, v0
27+
; GFX12-W64-NEXT: v_add_co_ci_u32_e64 v1, null, sym@abs32@hi, v1, vcc
28+
; GFX12-W64-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
29+
; GFX12-W64-NEXT: v_readfirstlane_b32 s0, v0
30+
; GFX12-W64-NEXT: v_readfirstlane_b32 s1, v1
31+
; GFX12-W64-NEXT: ; return to shader part epilog
32+
;
33+
; GFX1250-LABEL: v_add_symbol:
34+
; GFX1250: ; %bb.0:
35+
; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 4, sym@abs64
36+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
37+
; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
38+
; GFX1250-NEXT: v_readfirstlane_b32 s1, v1
39+
; GFX1250-NEXT: ; return to shader part epilog
40+
;
41+
; GFX13-W32-LABEL: v_add_symbol:
42+
; GFX13-W32: ; %bb.0:
43+
; GFX13-W32-NEXT: v_lshlrev_b64_e32 v[0:1], 4, v[0:1]
44+
; GFX13-W32-NEXT: s_mov_b64 s[0:1], sym@abs64
45+
; GFX13-W32-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
46+
; GFX13-W32-NEXT: v_add_co_u32 v0, vcc_lo, v0, sym@abs64
47+
; GFX13-W32-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc_lo
48+
; GFX13-W32-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
49+
; GFX13-W32-NEXT: v_readfirstlane_b32 s0, v0
50+
; GFX13-W32-NEXT: v_readfirstlane_b32 s1, v1
51+
; GFX13-W32-NEXT: ; return to shader part epilog
52+
;
53+
; GFX13-W64-LABEL: v_add_symbol:
54+
; GFX13-W64: ; %bb.0:
55+
; GFX13-W64-NEXT: v_lshlrev_b64_e32 v[0:1], 4, v[0:1]
56+
; GFX13-W64-NEXT: s_mov_b64 s[0:1], sym@abs64
57+
; GFX13-W64-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
58+
; GFX13-W64-NEXT: v_add_co_u32 v0, vcc, v0, sym@abs64
59+
; GFX13-W64-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc
60+
; GFX13-W64-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
61+
; GFX13-W64-NEXT: v_readfirstlane_b32 s0, v0
62+
; GFX13-W64-NEXT: v_readfirstlane_b32 s1, v1
63+
; GFX13-W64-NEXT: ; return to shader part epilog
64+
%i = getelementptr [2 x <3 x i32>], ptr addrspace(4) @sym, i64 0, i64 %arg
65+
ret ptr addrspace(4) %i
66+
}

0 commit comments

Comments
 (0)