1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -o - < %s | FileCheck -check-prefix=CHECK %s
3+ ; ModuleID = 'llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll'
4+ source_filename = "llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll"
25
3- ; CHECK-NOT: v_lshlrev_b32_sdwa v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4-
5- target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
6- target triple = "amdgcn-amd-amdhsa"
7-
8- define amdgpu_kernel void @kernel (ptr addrspace (1 ) %input.coerce , i32 %0 , i1 %cmp3.i , i32 %add5.1 , ptr addrspace (3 ) %1 , ptr addrspace (3 ) %2 ) {
9- ; CHECK-LABEL: kernel:
10- ; CHECK-NEXT: ; %bb.0: ; %entry
6+ define amdgpu_kernel void @bar (ptr addrspace (1 ) %arg3 , i32 %arg , i1 %arg4 , i32 %arg5 , ptr addrspace (3 ) %arg6 , ptr addrspace (3 ) %arg7 ) {
7+ ; CHECK-LABEL: bar:
8+ ; CHECK: ; %bb.0: ; %bb
119; CHECK-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1210; CHECK-NEXT: v_mov_b32_e32 v2, 8
1311; CHECK-NEXT: s_waitcnt lgkmcnt(0)
@@ -24,59 +22,71 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %input.coerce, i32 %0, i1 %cm
2422; CHECK-NEXT: s_waitcnt vmcnt(0)
2523; CHECK-NEXT: v_lshl_or_b32 v0, v0, 16, v1
2624; CHECK-NEXT: s_cbranch_vccz .LBB0_2
27- ; CHECK-NEXT: ; %bb.1: ; %if.then.i
25+ ; CHECK-NEXT: ; %bb.1: ; %bb23
2826; CHECK-NEXT: v_mov_b32_e32 v1, 0
2927; CHECK-NEXT: ds_write_b32 v1, v1
30- ; CHECK-NEXT: .LBB0_2: ; %if.end.i
28+ ; CHECK-NEXT: .LBB0_2: ; %bb24
3129; CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0
3230; CHECK-NEXT: s_mov_b32 s3, exec_lo
3331; CHECK-NEXT: v_cmpx_ne_u16_e32 0, v1
3432; CHECK-NEXT: s_xor_b32 s3, exec_lo, s3
3533; CHECK-NEXT: s_cbranch_execz .LBB0_4
36- ; CHECK-NEXT: ; %bb.3: ; %if.then.i.i.i.i.i
34+ ; CHECK-NEXT: ; %bb.3: ; %bb15
3735; CHECK-NEXT: v_mov_b32_e32 v2, 2
3836; CHECK-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3937; CHECK-NEXT: v_mov_b32_e32 v2, s2
4038; CHECK-NEXT: ds_write_b32 v1, v2 offset:84
39+ ; CHECK-NEXT: .LBB0_4: ; %bb18
40+ ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s3
41+ ; CHECK-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x14
42+ ; CHECK-NEXT: v_bfe_u32 v1, v0, 8, 8
43+ ; CHECK-NEXT: v_and_b32_e32 v0, 0xff, v0
44+ ; CHECK-NEXT: v_mov_b32_e32 v2, 0
45+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
46+ ; CHECK-NEXT: v_lshl_add_u32 v1, v1, 2, s2
47+ ; CHECK-NEXT: v_lshl_add_u32 v0, v0, 2, s3
48+ ; CHECK-NEXT: ds_write_b32 v1, v2
49+ ; CHECK-NEXT: ds_write_b32 v0, v2
50+ ; CHECK-NEXT: global_store_dword v2, v2, s[0:1]
51+ ; CHECK-NEXT: s_endpgm
52+ bb:
53+ %call = tail call i32 @llvm.amdgcn.workitem.id.x ()
54+ %zext = zext i32 %call to i64
55+ %getelementptr = getelementptr i8 , ptr addrspace (1 ) %arg3 , i64 %zext
56+ %load = load i8 , ptr addrspace (1 ) %getelementptr , align 1
57+ %or = or disjoint i32 %call , 1
58+ %zext8 = zext i32 %or to i64
59+ %getelementptr9 = getelementptr i8 , ptr addrspace (1 ) %arg3 , i64 %zext8
60+ %load10 = load i8 , ptr addrspace (1 ) %getelementptr9 , align 1
61+ %or11 = or disjoint i32 %call , 2
62+ %zext12 = zext i32 %or11 to i64
63+ %getelementptr13 = getelementptr i8 , ptr addrspace (1 ) %arg3 , i64 %zext12
64+ %load14 = load i8 , ptr addrspace (1 ) %getelementptr13 , align 1
65+ br i1 %arg4 , label %bb23 , label %bb24
4166
42- entry:
43- %3 = tail call i32 @llvm.amdgcn.workitem.id.x ()
44- %idxprom = zext i32 %3 to i64
45- %arrayidx = getelementptr i8 , ptr addrspace (1 ) %input.coerce , i64 %idxprom
46- %4 = load i8 , ptr addrspace (1 ) %arrayidx , align 1
47- %add5.11 = or disjoint i32 %3 , 1
48- %idxprom.1 = zext i32 %add5.11 to i64
49- %arrayidx.1 = getelementptr i8 , ptr addrspace (1 ) %input.coerce , i64 %idxprom.1
50- %5 = load i8 , ptr addrspace (1 ) %arrayidx.1 , align 1
51- %add5.2 = or disjoint i32 %3 , 2
52- %idxprom.2 = zext i32 %add5.2 to i64
53- %arrayidx.2 = getelementptr i8 , ptr addrspace (1 ) %input.coerce , i64 %idxprom.2
54- %6 = load i8 , ptr addrspace (1 ) %arrayidx.2 , align 1
55- br i1 %cmp3.i , label %if.then.i , label %if.end.i
56-
57- if.then.i.i.i.i.i: ; preds = %if.end.i
58- %7 = zext i8 %6 to i32
59- %arrayidx7.i.i.i.i.i = getelementptr nusw [14 x i32 ], ptr addrspace (3 ) inttoptr (i32 84 to ptr addrspace (3 )), i32 0 , i32 %7
60- store i32 %0 , ptr addrspace (3 ) %arrayidx7.i.i.i.i.i , align 4
61- br label %func.exit.i.i.i
67+ bb15: ; preds = %bb24
68+ %zext16 = zext i8 %load14 to i32
69+ %getelementptr17 = getelementptr nusw [14 x i32 ], ptr addrspace (3 ) inttoptr (i32 84 to ptr addrspace (3 )), i32 0 , i32 %zext16
70+ store i32 %arg , ptr addrspace (3 ) %getelementptr17 , align 4
71+ br label %bb18
6272
63- func.exit.i.i.i: ; preds = %if.end.i , %if.then.i.i.i.i.i
64- %8 = zext i8 %5 to i32
65- %arrayidx7.i.i.1.i.i.i = getelementptr [14 x i32 ], ptr addrspace (3 ) %1 , i32 0 , i32 %8
66- store i32 0 , ptr addrspace (3 ) %arrayidx7.i.i.1.i.i.i , align 4
67- %9 = zext i8 %4 to i32
68- %arrayidx12.i = getelementptr [14 x i32 ], ptr addrspace (3 ) %2 , i32 0 , i32 %9
69- store i32 0 , ptr addrspace (3 ) %arrayidx12.i , align 4
70- store i32 0 , ptr addrspace (1 ) %input.coerce , align 4
73+ bb18: ; preds = %bb24 , %bb15
74+ %zext19 = zext i8 %load10 to i32
75+ %getelementptr20 = getelementptr [14 x i32 ], ptr addrspace (3 ) %arg6 , i32 0 , i32 %zext19
76+ store i32 0 , ptr addrspace (3 ) %getelementptr20 , align 4
77+ %zext21 = zext i8 %load to i32
78+ %getelementptr22 = getelementptr [14 x i32 ], ptr addrspace (3 ) %arg7 , i32 0 , i32 %zext21
79+ store i32 0 , ptr addrspace (3 ) %getelementptr22 , align 4
80+ store i32 0 , ptr addrspace (1 ) %arg3 , align 4
7181 ret void
7282
73- if.then.i : ; preds = %entry
83+ bb23 : ; preds = %bb
7484 store i32 0 , ptr addrspace (3 ) null , align 4
75- br label %if.end.i
85+ br label %bb24
7686
77- if.end.i : ; preds = %if.then.i , %entry
78- %cmp.not.i.i.i.i.not.i = icmp eq i8 %6 , 0
79- br i1 %cmp.not.i.i.i.i.not.i , label %func.exit.i.i.i , label %if.then.i.i.i.i.i
87+ bb24 : ; preds = %bb23 , %bb
88+ %icmp = icmp eq i8 %load14 , 0
89+ br i1 %icmp , label %bb18 , label %bb15
8090}
8191
8292; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
0 commit comments