Skip to content

Commit 08484d9

Browse files
authored
[AMDGPU] 32-bit abs is not legal on R600 (#164103)
Fix bug introduced in #163907. 32-bit abs is not legal on R600. --------- Signed-off-by: John Lu <[email protected]>
1 parent 5a20b72 commit 08484d9

File tree

2 files changed

+95
-0
lines changed

2 files changed

+95
-0
lines changed

llvm/lib/Target/AMDGPU/R600ISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
4545
// Legalize loads and stores to the private address space.
4646
setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
4747

48+
// 32-bit ABS is legal for AMDGPU except for R600
49+
setOperationAction(ISD::ABS, MVT::i32, Expand);
50+
4851
// EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
4952
// spaces, so it is custom lowered to handle those where it isn't.
5053
for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD})
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
3+
; RUN: llc -mtriple=r600 -mcpu=cypress < %s | FileCheck -check-prefixes=R600 %s
4+
5+
define amdgpu_kernel void @abs_v1(ptr addrspace(1) %out, i32 %arg) {
6+
; GFX9-LABEL: abs_v1:
7+
; GFX9: ; %bb.0:
8+
; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
9+
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
10+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
11+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
12+
; GFX9-NEXT: s_abs_i32 s2, s2
13+
; GFX9-NEXT: v_mov_b32_e32 v1, s2
14+
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
15+
; GFX9-NEXT: s_endpgm
16+
;
17+
; R600-LABEL: abs_v1:
18+
; R600: ; %bb.0:
19+
; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[]
20+
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
21+
; R600-NEXT: CF_END
22+
; R600-NEXT: PAD
23+
; R600-NEXT: ALU clause starting at 4:
24+
; R600-NEXT: MOV * T0.W, KC0[2].Z,
25+
; R600-NEXT: SUB_INT * T1.W, 0.0, PV.W,
26+
; R600-NEXT: MAX_INT T0.X, T0.W, PV.W,
27+
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
28+
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
29+
%res = call i32 @llvm.abs.i32(i32 %arg, i1 false)
30+
store i32 %res, ptr addrspace(1) %out, align 4
31+
ret void
32+
}
33+
34+
define amdgpu_kernel void @abs_v2(ptr addrspace(1) %out, i32 %arg) {
35+
; GFX9-LABEL: abs_v2:
36+
; GFX9: ; %bb.0:
37+
; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
38+
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
39+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
40+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
41+
; GFX9-NEXT: s_abs_i32 s2, s2
42+
; GFX9-NEXT: v_mov_b32_e32 v1, s2
43+
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
44+
; GFX9-NEXT: s_endpgm
45+
;
46+
; R600-LABEL: abs_v2:
47+
; R600: ; %bb.0:
48+
; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
49+
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
50+
; R600-NEXT: CF_END
51+
; R600-NEXT: PAD
52+
; R600-NEXT: ALU clause starting at 4:
53+
; R600-NEXT: SUB_INT * T0.W, 0.0, KC0[2].Z,
54+
; R600-NEXT: MAX_INT T0.X, KC0[2].Z, PV.W,
55+
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
56+
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
57+
%neg = sub i32 0, %arg
58+
%cond = icmp sgt i32 %arg, %neg
59+
%res = select i1 %cond, i32 %arg, i32 %neg
60+
store i32 %res, ptr addrspace(1) %out, align 4
61+
ret void
62+
}
63+
64+
define amdgpu_kernel void @abs_v3(ptr addrspace(1) %out, i32 %arg) {
65+
; GFX9-LABEL: abs_v3:
66+
; GFX9: ; %bb.0:
67+
; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
68+
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
69+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
70+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
71+
; GFX9-NEXT: s_abs_i32 s2, s2
72+
; GFX9-NEXT: v_mov_b32_e32 v1, s2
73+
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
74+
; GFX9-NEXT: s_endpgm
75+
;
76+
; R600-LABEL: abs_v3:
77+
; R600: ; %bb.0:
78+
; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
79+
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
80+
; R600-NEXT: CF_END
81+
; R600-NEXT: PAD
82+
; R600-NEXT: ALU clause starting at 4:
83+
; R600-NEXT: SUB_INT * T0.W, 0.0, KC0[2].Z,
84+
; R600-NEXT: MAX_INT T0.X, PV.W, KC0[2].Z,
85+
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
86+
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
87+
%neg = sub i32 0, %arg
88+
%cond = icmp sgt i32 %neg, %arg
89+
%res = select i1 %cond, i32 %neg, i32 %arg
90+
store i32 %res, ptr addrspace(1) %out, align 4
91+
ret void
92+
}

0 commit comments

Comments
 (0)