Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -838,9 +838,11 @@ def S_CBRANCH_G_FORK : SOP2_Pseudo <
let SubtargetPredicate = isGFX6GFX7GFX8GFX9;
}

let Defs = [SCC] in {
def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32">;
} // End Defs = [SCC]
let isCommutable = 1, Defs = [SCC] in {
def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32",
[(set i32:$sdst, (UniformUnaryFrag<abs> (sub_oneuse i32:$src0, i32:$src1)))]
>;
} // End isCommutable = 1, Defs = [SCC]

let SubtargetPredicate = isGFX8GFX9 in {
def S_RFE_RESTORE_B64 : SOP2_Pseudo <
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/CodeGen/AMDGPU/absdiff.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck %s

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also test 16-bit promoted case

Copy link
Contributor Author

@LU-JOHN LU-JOHN Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added 16-bit test case.

define amdgpu_ps i32 @absdiff_v1(i32 inreg %arg, i32 inreg %arg2) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test vectors

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tested <2 x i32> and <4 x i32>

; CHECK-LABEL: absdiff_v1:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_absdiff_i32 s0, s0, s1
; CHECK-NEXT: ; return to shader part epilog
%diff = sub i32 %arg, %arg2
%res = call i32 @llvm.abs.i32(i32 %diff, i1 false)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also test with true?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added test with true variant.

ret i32 %res
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Negative test for the multi use case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added negative test for multi-use case.

25 changes: 20 additions & 5 deletions llvm/test/CodeGen/AMDGPU/s_cmp_0.ll
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,21 @@ define amdgpu_ps i32 @abs32(i32 inreg %val0) {
ret i32 %zext
}

define amdgpu_ps i32 @absdiff32(i32 inreg %val0, i32 inreg %val1) {
; CHECK-LABEL: absdiff32:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_absdiff_i32 s0, s0, s1
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
Comment on lines +117 to +119
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Codegen is terrible here, but that's not your fault. Should be:

Suggested change
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: s_cselect_b32 s0, 1, 0

; CHECK-NEXT: ; return to shader part epilog
%diff = sub i32 %val0, %val1
%result = call i32 @llvm.abs.i32(i32 %diff, i1 false)
%cmp = icmp ne i32 %result, 0
%zext = zext i1 %cmp to i32
ret i32 %zext
}

define amdgpu_ps i32 @and32(i32 inreg %val0, i32 inreg %val1) {
; CHECK-LABEL: and32:
; CHECK: ; %bb.0:
Expand Down Expand Up @@ -608,14 +623,14 @@ define amdgpu_ps i32 @si_pc_add_rel_offset_must_not_optimize() {
; CHECK-NEXT: s_add_u32 s0, s0, __unnamed_1@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s1, s1, __unnamed_1@rel32@hi+12
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
; CHECK-NEXT: s_cbranch_scc0 .LBB35_2
; CHECK-NEXT: s_cbranch_scc0 .LBB36_2
; CHECK-NEXT: ; %bb.1: ; %endif
; CHECK-NEXT: s_mov_b32 s0, 1
; CHECK-NEXT: s_branch .LBB35_3
; CHECK-NEXT: .LBB35_2: ; %if
; CHECK-NEXT: s_branch .LBB36_3
; CHECK-NEXT: .LBB36_2: ; %if
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: s_branch .LBB35_3
; CHECK-NEXT: .LBB35_3:
; CHECK-NEXT: s_branch .LBB36_3
; CHECK-NEXT: .LBB36_3:
%cmp = icmp ne ptr addrspace(4) @1, null
br i1 %cmp, label %endif, label %if

Expand Down
Loading