Skip to content

Commit 69c0a6e

Browse files
committed
remove redundant S_ADD nop
1 parent f48a096 commit 69c0a6e

File tree

2 files changed

+5
-21
lines changed

2 files changed

+5
-21
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2765,7 +2765,7 @@ def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
27652765
>;
27662766

27672767
def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
2768-
(S_OR_B32 (S_LSHR_B32 $src1, (S_AND_B32 $src2, (i32 0xffffffff))), (S_LSHL_B32 $src0, (S_SUB_I32 (i32 32), (S_AND_B32 $src2, (i32 0xffffffff)))))
2768+
(S_OR_B32 (S_LSHR_B32 $src1, $src2), (S_LSHL_B32 $src0, (S_SUB_I32 (i32 32), $src2)))
27692769
>;
27702770

27712771
} // end True16Predicate = UseFakeTrue16Insts

llvm/test/CodeGen/AMDGPU/fshr.ll

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,11 @@ define amdgpu_kernel void @fshr_i32(ptr addrspace(1) %in, i32 %x, i32 %y, i32 %z
103103
; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c
104104
; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
105105
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
106-
; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, -1
107-
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
108106
; GFX11-FAKE16-NEXT: s_sub_i32 s3, 32, s2
109107
; GFX11-FAKE16-NEXT: s_lshr_b32 s1, s1, s2
110108
; GFX11-FAKE16-NEXT: s_lshl_b32 s0, s0, s3
109+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
111110
; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0
112-
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
113111
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
114112
; GFX11-FAKE16-NEXT: global_store_b32 v0, v1, s[4:5]
115113
; GFX11-FAKE16-NEXT: s_endpgm
@@ -133,13 +131,11 @@ define amdgpu_kernel void @fshr_i32(ptr addrspace(1) %in, i32 %x, i32 %y, i32 %z
133131
; GFX12-FAKE16-NEXT: s_load_b96 s[0:2], s[4:5], 0x2c
134132
; GFX12-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
135133
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
136-
; GFX12-FAKE16-NEXT: s_and_b32 s2, s2, -1
137-
; GFX12-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
138134
; GFX12-FAKE16-NEXT: s_sub_co_i32 s3, 32, s2
139135
; GFX12-FAKE16-NEXT: s_lshr_b32 s1, s1, s2
140136
; GFX12-FAKE16-NEXT: s_lshl_b32 s0, s0, s3
137+
; GFX12-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
141138
; GFX12-FAKE16-NEXT: s_or_b32 s0, s1, s0
142-
; GFX12-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
143139
; GFX12-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
144140
; GFX12-FAKE16-NEXT: global_store_b32 v0, v1, s[4:5]
145141
; GFX12-FAKE16-NEXT: s_endpgm
@@ -357,11 +353,9 @@ define amdgpu_kernel void @fshr_v2i32(ptr addrspace(1) %in, <2 x i32> %x, <2 x i
357353
; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c
358354
; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
359355
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
360-
; GFX11-FAKE16-NEXT: s_and_b32 s7, s7, -1
361-
; GFX11-FAKE16-NEXT: s_and_b32 s6, s6, -1
356+
; GFX11-FAKE16-NEXT: s_sub_i32 s8, 32, s6
362357
; GFX11-FAKE16-NEXT: s_lshr_b32 s3, s3, s7
363358
; GFX11-FAKE16-NEXT: s_sub_i32 s7, 32, s7
364-
; GFX11-FAKE16-NEXT: s_sub_i32 s8, 32, s6
365359
; GFX11-FAKE16-NEXT: s_lshr_b32 s2, s2, s6
366360
; GFX11-FAKE16-NEXT: s_lshl_b32 s0, s0, s8
367361
; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, s7
@@ -396,11 +390,9 @@ define amdgpu_kernel void @fshr_v2i32(ptr addrspace(1) %in, <2 x i32> %x, <2 x i
396390
; GFX12-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c
397391
; GFX12-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
398392
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
399-
; GFX12-FAKE16-NEXT: s_and_b32 s7, s7, -1
400-
; GFX12-FAKE16-NEXT: s_and_b32 s6, s6, -1
393+
; GFX12-FAKE16-NEXT: s_sub_co_i32 s8, 32, s6
401394
; GFX12-FAKE16-NEXT: s_lshr_b32 s3, s3, s7
402395
; GFX12-FAKE16-NEXT: s_sub_co_i32 s7, 32, s7
403-
; GFX12-FAKE16-NEXT: s_sub_co_i32 s8, 32, s6
404396
; GFX12-FAKE16-NEXT: s_lshr_b32 s2, s2, s6
405397
; GFX12-FAKE16-NEXT: s_lshl_b32 s0, s0, s8
406398
; GFX12-FAKE16-NEXT: s_lshl_b32 s1, s1, s7
@@ -688,10 +680,6 @@ define amdgpu_kernel void @fshr_v4i32(ptr addrspace(1) %in, <4 x i32> %x, <4 x i
688680
; GFX11-FAKE16-NEXT: s_load_b256 s[8:15], s[4:5], 0x34
689681
; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
690682
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
691-
; GFX11-FAKE16-NEXT: s_and_b32 s3, s3, -1
692-
; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, -1
693-
; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, -1
694-
; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, -1
695683
; GFX11-FAKE16-NEXT: s_lshr_b32 s6, s15, s3
696684
; GFX11-FAKE16-NEXT: s_sub_i32 s3, 32, s3
697685
; GFX11-FAKE16-NEXT: s_lshr_b32 s7, s14, s2
@@ -743,10 +731,6 @@ define amdgpu_kernel void @fshr_v4i32(ptr addrspace(1) %in, <4 x i32> %x, <4 x i
743731
; GFX12-FAKE16-NEXT: s_load_b256 s[8:15], s[4:5], 0x34
744732
; GFX12-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
745733
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
746-
; GFX12-FAKE16-NEXT: s_and_b32 s3, s3, -1
747-
; GFX12-FAKE16-NEXT: s_and_b32 s2, s2, -1
748-
; GFX12-FAKE16-NEXT: s_and_b32 s1, s1, -1
749-
; GFX12-FAKE16-NEXT: s_and_b32 s0, s0, -1
750734
; GFX12-FAKE16-NEXT: s_lshr_b32 s6, s15, s3
751735
; GFX12-FAKE16-NEXT: s_sub_co_i32 s3, 32, s3
752736
; GFX12-FAKE16-NEXT: s_lshr_b32 s7, s14, s2

0 commit comments

Comments
 (0)