Skip to content

Commit 16729b5

Browse files
committed
Use named values and generated checks in sdwa-peephole-instr-combine-sel.ll
1 parent 7839be8 commit 16729b5

File tree

1 file changed

+54
-44
lines changed

1 file changed

+54
-44
lines changed

llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll

Lines changed: 54 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -o - < %s | FileCheck -check-prefix=CHECK %s
3+
; ModuleID = 'llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll'
4+
source_filename = "llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll"
25

3-
; CHECK-NOT: v_lshlrev_b32_sdwa v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4-
5-
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
6-
target triple = "amdgcn-amd-amdhsa"
7-
8-
define amdgpu_kernel void @kernel(ptr addrspace(1) %input.coerce, i32 %0, i1 %cmp3.i, i32 %add5.1, ptr addrspace(3) %1, ptr addrspace(3) %2) {
9-
; CHECK-LABEL: kernel:
10-
; CHECK-NEXT: ; %bb.0: ; %entry
6+
define amdgpu_kernel void @bar(ptr addrspace(1) %arg3, i32 %arg, i1 %arg4, i32 %arg5, ptr addrspace(3) %arg6, ptr addrspace(3) %arg7) {
7+
; CHECK-LABEL: bar:
8+
; CHECK: ; %bb.0: ; %bb
119
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1210
; CHECK-NEXT: v_mov_b32_e32 v2, 8
1311
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
@@ -24,59 +22,71 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %input.coerce, i32 %0, i1 %cm
2422
; CHECK-NEXT: s_waitcnt vmcnt(0)
2523
; CHECK-NEXT: v_lshl_or_b32 v0, v0, 16, v1
2624
; CHECK-NEXT: s_cbranch_vccz .LBB0_2
27-
; CHECK-NEXT: ; %bb.1: ; %if.then.i
25+
; CHECK-NEXT: ; %bb.1: ; %bb23
2826
; CHECK-NEXT: v_mov_b32_e32 v1, 0
2927
; CHECK-NEXT: ds_write_b32 v1, v1
30-
; CHECK-NEXT: .LBB0_2: ; %if.end.i
28+
; CHECK-NEXT: .LBB0_2: ; %bb24
3129
; CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0
3230
; CHECK-NEXT: s_mov_b32 s3, exec_lo
3331
; CHECK-NEXT: v_cmpx_ne_u16_e32 0, v1
3432
; CHECK-NEXT: s_xor_b32 s3, exec_lo, s3
3533
; CHECK-NEXT: s_cbranch_execz .LBB0_4
36-
; CHECK-NEXT: ; %bb.3: ; %if.then.i.i.i.i.i
34+
; CHECK-NEXT: ; %bb.3: ; %bb15
3735
; CHECK-NEXT: v_mov_b32_e32 v2, 2
3836
; CHECK-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3937
; CHECK-NEXT: v_mov_b32_e32 v2, s2
4038
; CHECK-NEXT: ds_write_b32 v1, v2 offset:84
39+
; CHECK-NEXT: .LBB0_4: ; %bb18
40+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s3
41+
; CHECK-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x14
42+
; CHECK-NEXT: v_bfe_u32 v1, v0, 8, 8
43+
; CHECK-NEXT: v_and_b32_e32 v0, 0xff, v0
44+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
45+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
46+
; CHECK-NEXT: v_lshl_add_u32 v1, v1, 2, s2
47+
; CHECK-NEXT: v_lshl_add_u32 v0, v0, 2, s3
48+
; CHECK-NEXT: ds_write_b32 v1, v2
49+
; CHECK-NEXT: ds_write_b32 v0, v2
50+
; CHECK-NEXT: global_store_dword v2, v2, s[0:1]
51+
; CHECK-NEXT: s_endpgm
52+
bb:
53+
%call = tail call i32 @llvm.amdgcn.workitem.id.x()
54+
%zext = zext i32 %call to i64
55+
%getelementptr = getelementptr i8, ptr addrspace(1) %arg3, i64 %zext
56+
%load = load i8, ptr addrspace(1) %getelementptr, align 1
57+
%or = or disjoint i32 %call, 1
58+
%zext8 = zext i32 %or to i64
59+
%getelementptr9 = getelementptr i8, ptr addrspace(1) %arg3, i64 %zext8
60+
%load10 = load i8, ptr addrspace(1) %getelementptr9, align 1
61+
%or11 = or disjoint i32 %call, 2
62+
%zext12 = zext i32 %or11 to i64
63+
%getelementptr13 = getelementptr i8, ptr addrspace(1) %arg3, i64 %zext12
64+
%load14 = load i8, ptr addrspace(1) %getelementptr13, align 1
65+
br i1 %arg4, label %bb23, label %bb24
4166

42-
entry:
43-
%3 = tail call i32 @llvm.amdgcn.workitem.id.x()
44-
%idxprom = zext i32 %3 to i64
45-
%arrayidx = getelementptr i8, ptr addrspace(1) %input.coerce, i64 %idxprom
46-
%4 = load i8, ptr addrspace(1) %arrayidx, align 1
47-
%add5.11 = or disjoint i32 %3, 1
48-
%idxprom.1 = zext i32 %add5.11 to i64
49-
%arrayidx.1 = getelementptr i8, ptr addrspace(1) %input.coerce, i64 %idxprom.1
50-
%5 = load i8, ptr addrspace(1) %arrayidx.1, align 1
51-
%add5.2 = or disjoint i32 %3, 2
52-
%idxprom.2 = zext i32 %add5.2 to i64
53-
%arrayidx.2 = getelementptr i8, ptr addrspace(1) %input.coerce, i64 %idxprom.2
54-
%6 = load i8, ptr addrspace(1) %arrayidx.2, align 1
55-
br i1 %cmp3.i, label %if.then.i, label %if.end.i
56-
57-
if.then.i.i.i.i.i: ; preds = %if.end.i
58-
%7 = zext i8 %6 to i32
59-
%arrayidx7.i.i.i.i.i = getelementptr nusw [14 x i32], ptr addrspace(3) inttoptr (i32 84 to ptr addrspace(3)), i32 0, i32 %7
60-
store i32 %0, ptr addrspace(3) %arrayidx7.i.i.i.i.i, align 4
61-
br label %func.exit.i.i.i
67+
bb15: ; preds = %bb24
68+
%zext16 = zext i8 %load14 to i32
69+
%getelementptr17 = getelementptr nusw [14 x i32], ptr addrspace(3) inttoptr (i32 84 to ptr addrspace(3)), i32 0, i32 %zext16
70+
store i32 %arg, ptr addrspace(3) %getelementptr17, align 4
71+
br label %bb18
6272

63-
func.exit.i.i.i: ; preds = %if.end.i, %if.then.i.i.i.i.i
64-
%8 = zext i8 %5 to i32
65-
%arrayidx7.i.i.1.i.i.i = getelementptr [14 x i32], ptr addrspace(3) %1, i32 0, i32 %8
66-
store i32 0, ptr addrspace(3) %arrayidx7.i.i.1.i.i.i, align 4
67-
%9 = zext i8 %4 to i32
68-
%arrayidx12.i = getelementptr [14 x i32], ptr addrspace(3) %2, i32 0, i32 %9
69-
store i32 0, ptr addrspace(3) %arrayidx12.i, align 4
70-
store i32 0, ptr addrspace(1) %input.coerce, align 4
73+
bb18: ; preds = %bb24, %bb15
74+
%zext19 = zext i8 %load10 to i32
75+
%getelementptr20 = getelementptr [14 x i32], ptr addrspace(3) %arg6, i32 0, i32 %zext19
76+
store i32 0, ptr addrspace(3) %getelementptr20, align 4
77+
%zext21 = zext i8 %load to i32
78+
%getelementptr22 = getelementptr [14 x i32], ptr addrspace(3) %arg7, i32 0, i32 %zext21
79+
store i32 0, ptr addrspace(3) %getelementptr22, align 4
80+
store i32 0, ptr addrspace(1) %arg3, align 4
7181
ret void
7282

73-
if.then.i: ; preds = %entry
83+
bb23: ; preds = %bb
7484
store i32 0, ptr addrspace(3) null, align 4
75-
br label %if.end.i
85+
br label %bb24
7686

77-
if.end.i: ; preds = %if.then.i, %entry
78-
%cmp.not.i.i.i.i.not.i = icmp eq i8 %6, 0
79-
br i1 %cmp.not.i.i.i.i.not.i, label %func.exit.i.i.i, label %if.then.i.i.i.i.i
87+
bb24: ; preds = %bb23, %bb
88+
%icmp = icmp eq i8 %load14, 0
89+
br i1 %icmp, label %bb18, label %bb15
8090
}
8191

8292
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)

0 commit comments

Comments
 (0)