Skip to content

Commit a371ba3

Browse files
committed
Generate s_absdiff_i32
Signed-off-by: John Lu <[email protected]>
1 parent 9abbec6 commit a371ba3

File tree

3 files changed

+37
-8
lines changed

3 files changed

+37
-8
lines changed

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -838,9 +838,11 @@ def S_CBRANCH_G_FORK : SOP2_Pseudo <
838838
let SubtargetPredicate = isGFX6GFX7GFX8GFX9;
839839
}
840840

841-
let Defs = [SCC] in {
842-
def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32">;
843-
} // End Defs = [SCC]
841+
let isCommutable = 1, Defs = [SCC] in {
842+
def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32",
843+
[(set i32:$sdst, (UniformUnaryFrag<abs> (sub_oneuse i32:$src0, i32:$src1)))]
844+
>;
845+
} // End isCommutable = 1, Defs = [SCC]
844846

845847
let SubtargetPredicate = isGFX8GFX9 in {
846848
def S_RFE_RESTORE_B64 : SOP2_Pseudo <
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck %s
3+
4+
define amdgpu_ps i32 @absdiff_v1(i32 inreg %arg, i32 inreg %arg2) {
5+
; CHECK-LABEL: absdiff_v1:
6+
; CHECK: ; %bb.0:
7+
; CHECK-NEXT: s_absdiff_i32 s0, s0, s1
8+
; CHECK-NEXT: ; return to shader part epilog
9+
%diff = sub i32 %arg, %arg2
10+
%res = call i32 @llvm.abs.i32(i32 %diff, i1 false)
11+
ret i32 %res
12+
}

llvm/test/CodeGen/AMDGPU/s_cmp_0.ll

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,21 @@ define amdgpu_ps i32 @abs32(i32 inreg %val0) {
110110
ret i32 %zext
111111
}
112112

113+
define amdgpu_ps i32 @absdiff32(i32 inreg %val0, i32 inreg %val1) {
114+
; CHECK-LABEL: absdiff32:
115+
; CHECK: ; %bb.0:
116+
; CHECK-NEXT: s_absdiff_i32 s0, s0, s1
117+
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
118+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
119+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
120+
; CHECK-NEXT: ; return to shader part epilog
121+
%diff = sub i32 %val0, %val1
122+
%result = call i32 @llvm.abs.i32(i32 %diff, i1 false)
123+
%cmp = icmp ne i32 %result, 0
124+
%zext = zext i1 %cmp to i32
125+
ret i32 %zext
126+
}
127+
113128
define amdgpu_ps i32 @and32(i32 inreg %val0, i32 inreg %val1) {
114129
; CHECK-LABEL: and32:
115130
; CHECK: ; %bb.0:
@@ -608,14 +623,14 @@ define amdgpu_ps i32 @si_pc_add_rel_offset_must_not_optimize() {
608623
; CHECK-NEXT: s_add_u32 s0, s0, __unnamed_1@rel32@lo+4
609624
; CHECK-NEXT: s_addc_u32 s1, s1, __unnamed_1@rel32@hi+12
610625
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
611-
; CHECK-NEXT: s_cbranch_scc0 .LBB35_2
626+
; CHECK-NEXT: s_cbranch_scc0 .LBB36_2
612627
; CHECK-NEXT: ; %bb.1: ; %endif
613628
; CHECK-NEXT: s_mov_b32 s0, 1
614-
; CHECK-NEXT: s_branch .LBB35_3
615-
; CHECK-NEXT: .LBB35_2: ; %if
629+
; CHECK-NEXT: s_branch .LBB36_3
630+
; CHECK-NEXT: .LBB36_2: ; %if
616631
; CHECK-NEXT: s_mov_b32 s0, 0
617-
; CHECK-NEXT: s_branch .LBB35_3
618-
; CHECK-NEXT: .LBB35_3:
632+
; CHECK-NEXT: s_branch .LBB36_3
633+
; CHECK-NEXT: .LBB36_3:
619634
%cmp = icmp ne ptr addrspace(4) @1, null
620635
br i1 %cmp, label %endif, label %if
621636

0 commit comments

Comments
 (0)