Skip to content

Commit 6be252b

Browse files
committed
fixup! Use named values and generated checks in sdwa-peephole-instr-combine-sel.ll
1 parent bdda1de commit 6be252b

File tree

1 file changed

+49
-57
lines changed

1 file changed

+49
-57
lines changed

llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll

Lines changed: 49 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,92 +1,84 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -o - < %s | FileCheck -check-prefix=CHECK %s
3-
; ModuleID = 'llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll'
4-
source_filename = "llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll"
53

6-
define amdgpu_kernel void @bar(ptr addrspace(1) %arg3, i32 %arg, i1 %arg4, i32 %arg5, ptr addrspace(3) %arg6, ptr addrspace(3) %arg7) {
7-
; CHECK-LABEL: bar:
4+
; The si-peephole-sdwa pass has mishandled the selections of preexisting sdwa instructions
5+
; which led to an instruction of this shape:
6+
; v_lshlrev_b32_sdwa v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
7+
; instead of
8+
; v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
9+
10+
define amdgpu_kernel void @widget(ptr addrspace(1) %arg, i1 %arg1, ptr addrspace(3) %arg2, ptr addrspace(3) %arg3) {
11+
; CHECK-LABEL: widget:
812
; CHECK: ; %bb.0: ; %bb
9-
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
13+
; CHECK-NEXT: s_clause 0x1
14+
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
15+
; CHECK-NEXT: s_load_dword s2, s[8:9], 0x8
1016
; CHECK-NEXT: v_mov_b32_e32 v2, 8
1117
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1218
; CHECK-NEXT: s_clause 0x1
1319
; CHECK-NEXT: global_load_ushort v1, v0, s[0:1]
1420
; CHECK-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2
15-
; CHECK-NEXT: s_bitcmp1_b32 s3, 0
16-
; CHECK-NEXT: s_cselect_b32 s3, -1, 0
17-
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s3
21+
; CHECK-NEXT: s_bitcmp1_b32 s2, 0
22+
; CHECK-NEXT: s_cselect_b32 s0, -1, 0
23+
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s0
1824
; CHECK-NEXT: s_waitcnt vmcnt(1)
1925
; CHECK-NEXT: v_lshrrev_b32_sdwa v2, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
2026
; CHECK-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2127
; CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v1
2228
; CHECK-NEXT: s_waitcnt vmcnt(0)
2329
; CHECK-NEXT: v_lshl_or_b32 v0, v0, 16, v1
2430
; CHECK-NEXT: s_cbranch_vccz .LBB0_2
25-
; CHECK-NEXT: ; %bb.1: ; %bb23
31+
; CHECK-NEXT: ; %bb.1: ; %bb19
2632
; CHECK-NEXT: v_mov_b32_e32 v1, 0
2733
; CHECK-NEXT: ds_write_b32 v1, v1
28-
; CHECK-NEXT: .LBB0_2: ; %bb24
29-
; CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0
30-
; CHECK-NEXT: s_mov_b32 s3, exec_lo
31-
; CHECK-NEXT: v_cmpx_ne_u16_e32 0, v1
32-
; CHECK-NEXT: s_xor_b32 s3, exec_lo, s3
34+
; CHECK-NEXT: .LBB0_2: ; %bb20
35+
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0
36+
; CHECK-NEXT: s_mov_b32 s0, exec_lo
37+
; CHECK-NEXT: v_cmpx_ne_u16_e32 0, v0
38+
; CHECK-NEXT: s_xor_b32 s0, exec_lo, s0
3339
; CHECK-NEXT: s_cbranch_execz .LBB0_4
34-
; CHECK-NEXT: ; %bb.3: ; %bb15
35-
; CHECK-NEXT: v_mov_b32_e32 v2, 2
36-
; CHECK-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
37-
; CHECK-NEXT: v_mov_b32_e32 v2, s2
38-
; CHECK-NEXT: ds_write_b32 v1, v2 offset:84
39-
; CHECK-NEXT: .LBB0_4: ; %bb18
40-
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s3
41-
; CHECK-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x14
42-
; CHECK-NEXT: v_bfe_u32 v1, v0, 8, 8
43-
; CHECK-NEXT: v_and_b32_e32 v0, 0xff, v0
44-
; CHECK-NEXT: v_mov_b32_e32 v2, 0
45-
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
46-
; CHECK-NEXT: v_lshl_add_u32 v1, v1, 2, s2
47-
; CHECK-NEXT: v_lshl_add_u32 v0, v0, 2, s3
48-
; CHECK-NEXT: ds_write_b32 v1, v2
49-
; CHECK-NEXT: ds_write_b32 v0, v2
50-
; CHECK-NEXT: global_store_dword v2, v2, s[0:1]
40+
; CHECK-NEXT: ; %bb.3: ; %bb11
41+
; CHECK-NEXT: v_mov_b32_e32 v1, 2
42+
; CHECK-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
43+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
44+
; CHECK-NEXT: ds_write_b32 v0, v1 offset:84
45+
; CHECK-NEXT: .LBB0_4: ; %bb14
5146
; CHECK-NEXT: s_endpgm
5247
bb:
5348
%call = tail call i32 @llvm.amdgcn.workitem.id.x()
5449
%zext = zext i32 %call to i64
55-
%getelementptr = getelementptr i8, ptr addrspace(1) %arg3, i64 %zext
50+
%getelementptr = getelementptr i8, ptr addrspace(1) %arg, i64 %zext
5651
%load = load i8, ptr addrspace(1) %getelementptr, align 1
5752
%or = or disjoint i32 %call, 1
58-
%zext8 = zext i32 %or to i64
59-
%getelementptr9 = getelementptr i8, ptr addrspace(1) %arg3, i64 %zext8
53+
%zext4 = zext i32 %or to i64
54+
%getelementptr5 = getelementptr i8, ptr addrspace(1) %arg, i64 %zext4
55+
%load6 = load i8, ptr addrspace(1) %getelementptr5, align 1
56+
%or7 = or disjoint i32 %call, 2
57+
%zext8 = zext i32 %or7 to i64
58+
%getelementptr9 = getelementptr i8, ptr addrspace(1) %arg, i64 %zext8
6059
%load10 = load i8, ptr addrspace(1) %getelementptr9, align 1
61-
%or11 = or disjoint i32 %call, 2
62-
%zext12 = zext i32 %or11 to i64
63-
%getelementptr13 = getelementptr i8, ptr addrspace(1) %arg3, i64 %zext12
64-
%load14 = load i8, ptr addrspace(1) %getelementptr13, align 1
65-
br i1 %arg4, label %bb23, label %bb24
60+
br i1 %arg1, label %bb19, label %bb20
6661

67-
bb15: ; preds = %bb24
68-
%zext16 = zext i8 %load14 to i32
69-
%getelementptr17 = getelementptr nusw [14 x i32], ptr addrspace(3) inttoptr (i32 84 to ptr addrspace(3)), i32 0, i32 %zext16
70-
store i32 %arg, ptr addrspace(3) %getelementptr17, align 4
71-
br label %bb18
62+
bb11: ; preds = %bb20
63+
%zext12 = zext i8 %load10 to i64
64+
%getelementptr13 = getelementptr nusw [14 x i32], ptr addrspace(3) inttoptr (i32 84 to ptr addrspace(3)), i64 0, i64 %zext12
65+
store i32 0, ptr addrspace(3) %getelementptr13, align 4
66+
br label %bb14
7267

73-
bb18: ; preds = %bb24, %bb15
74-
%zext19 = zext i8 %load10 to i32
75-
%getelementptr20 = getelementptr [14 x i32], ptr addrspace(3) %arg6, i32 0, i32 %zext19
76-
store i32 0, ptr addrspace(3) %getelementptr20, align 4
77-
%zext21 = zext i8 %load to i32
78-
%getelementptr22 = getelementptr [14 x i32], ptr addrspace(3) %arg7, i32 0, i32 %zext21
79-
store i32 0, ptr addrspace(3) %getelementptr22, align 4
80-
store i32 0, ptr addrspace(1) %arg3, align 4
68+
bb14: ; preds = %bb20, %bb11
69+
%zext15 = zext i8 %load6 to i64
70+
%getelementptr16 = getelementptr [14 x i32], ptr addrspace(3) %arg2, i64 0, i64 %zext15
71+
%zext17 = zext i8 %load to i64
72+
%getelementptr18 = getelementptr [14 x i32], ptr addrspace(3) %arg3, i64 0, i64 %zext17
8173
ret void
8274

83-
bb23: ; preds = %bb
75+
bb19: ; preds = %bb
8476
store i32 0, ptr addrspace(3) null, align 4
85-
br label %bb24
77+
br label %bb20
8678

87-
bb24: ; preds = %bb23, %bb
88-
%icmp = icmp eq i8 %load14, 0
89-
br i1 %icmp, label %bb18, label %bb15
79+
bb20: ; preds = %bb19, %bb
80+
%icmp = icmp eq i8 %load10, 0
81+
br i1 %icmp, label %bb14, label %bb11
9082
}
9183

9284
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)

0 commit comments

Comments
 (0)