7
7
define amdgpu_vs void @test (<4 x i32 > inreg %arg1 , <6 x float > addrspace (3 )* %arg2 ) {
8
8
; CHECK-LABEL: test:
9
9
; CHECK: ; %bb.0:
10
- ; CHECK-NEXT: s_mov_b32 s8, s4
11
- ; CHECK-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
12
- ; CHECK-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1
13
- ; CHECK-NEXT: s_mov_b32 s6, -1
14
- ; CHECK-NEXT: s_mov_b32 s7, 0xe8f000
15
- ; CHECK-NEXT: s_add_u32 s4, s4, s8
16
- ; CHECK-NEXT: s_addc_u32 s5, s5, 0
10
+ ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 12, v0
17
11
; CHECK-NEXT: v_add_i32_e32 v1, vcc, 8, v0
18
- ; CHECK-NEXT: v_add_i32_e32 v2 , vcc, 12 , v0
12
+ ; CHECK-NEXT: v_add_i32_e32 v4 , vcc, 4 , v0
19
13
; CHECK-NEXT: s_mov_b32 m0, -1
20
- ; CHECK-NEXT: ds_read_b32 v1, v1
21
- ; CHECK-NEXT: ds_read_b32 v2, v2
22
- ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 4, v0
14
+ ; CHECK-NEXT: ds_read_b32 v2, v1
15
+ ; CHECK-NEXT: ds_read_b32 v1, v4
23
16
; CHECK-NEXT: ds_read_b32 v3, v3
24
17
; CHECK-NEXT: ds_read_b32 v0, v0
25
- ; CHECK-NEXT: s_waitcnt lgkmcnt(2)
26
- ; CHECK-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:28
27
- ; CHECK-NEXT: buffer_store_dword v1, off, s[4:7], 0 offset:24
28
- ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
29
- ; CHECK-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:20
30
18
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
31
- ; CHECK-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:16
32
- ; CHECK-NEXT: s_waitcnt expcnt(1)
33
- ; CHECK-NEXT: buffer_load_dword v3, off, s[4:7], 0 offset:28
34
- ; CHECK-NEXT: buffer_load_dword v2, off, s[4:7], 0 offset:24
35
- ; CHECK-NEXT: buffer_load_dword v1, off, s[4:7], 0 offset:20
36
- ; CHECK-NEXT: s_waitcnt expcnt(0)
37
- ; CHECK-NEXT: buffer_load_dword v0, off, s[4:7], 0 offset:16
38
- ; CHECK-NEXT: s_waitcnt vmcnt(0)
39
19
; CHECK-NEXT: exp mrt0 off, off, off, off
40
20
; CHECK-NEXT: v_mov_b32_e32 v4, 0
41
21
; CHECK-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_FLOAT] idxen
@@ -50,42 +30,25 @@ define amdgpu_vs void @test(<4 x i32> inreg %arg1, <6 x float> addrspace(3)* %ar
50
30
define amdgpu_vs void @test_2 (<4 x i32 > inreg %arg1 , i32 %arg2 , i32 inreg %arg3 , <8 x float > addrspace (3 )* %arg4 ) {
51
31
; CHECK-LABEL: test_2:
52
32
; CHECK: ; %bb.0:
53
- ; CHECK-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
54
- ; CHECK-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
55
- ; CHECK-NEXT: s_mov_b32 s10, -1
56
- ; CHECK-NEXT: s_mov_b32 s11, 0xe8f000
57
- ; CHECK-NEXT: s_add_u32 s8, s8, s5
58
- ; CHECK-NEXT: s_addc_u32 s9, s9, 0
33
+ ; CHECK-NEXT: v_add_i32_e32 v5, vcc, 28, v1
59
34
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 24, v1
60
- ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 28 , v1
35
+ ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 20 , v1
61
36
; CHECK-NEXT: v_add_i32_e32 v6, vcc, 16, v1
62
- ; CHECK-NEXT: v_add_i32_e32 v7, vcc, 20 , v1
37
+ ; CHECK-NEXT: v_add_i32_e32 v7, vcc, 12 , v1
63
38
; CHECK-NEXT: v_add_i32_e32 v8, vcc, 8, v1
64
- ; CHECK-NEXT: v_add_i32_e32 v9, vcc, 12, v1
65
39
; CHECK-NEXT: v_add_i32_e32 v10, vcc, 4, v1
66
40
; CHECK-NEXT: s_mov_b32 m0, -1
67
41
; CHECK-NEXT: ds_read_b32 v4, v2
68
- ; CHECK-NEXT: ds_read_b32 v5 , v3
42
+ ; CHECK-NEXT: ds_read_b32 v3 , v3
69
43
; CHECK-NEXT: ds_read_b32 v2, v6
70
- ; CHECK-NEXT: ds_read_b32 v3 , v7
44
+ ; CHECK-NEXT: ds_read_b32 v9 , v7
71
45
; CHECK-NEXT: ds_read_b32 v8, v8
72
- ; CHECK-NEXT: ds_read_b32 v9, v9
73
46
; CHECK-NEXT: ds_read_b32 v7, v10
74
47
; CHECK-NEXT: ds_read_b32 v6, v1
75
- ; CHECK-NEXT: s_waitcnt lgkmcnt(6)
76
- ; CHECK-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:28
77
- ; CHECK-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:24
78
- ; CHECK-NEXT: s_waitcnt lgkmcnt(4)
79
- ; CHECK-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:20
80
- ; CHECK-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:16
81
- ; CHECK-NEXT: s_waitcnt lgkmcnt(2)
82
- ; CHECK-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:44
83
- ; CHECK-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:40
48
+ ; CHECK-NEXT: ds_read_b32 v5, v5
84
49
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
85
- ; CHECK-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:36
86
- ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
87
- ; CHECK-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:32
88
50
; CHECK-NEXT: tbuffer_store_format_xyzw v[6:9], v0, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen glc slc
51
+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
89
52
; CHECK-NEXT: tbuffer_store_format_xyzw v[2:5], v0, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:16 glc slc
90
53
; CHECK-NEXT: s_endpgm
91
54
%load = load <8 x float >, <8 x float > addrspace (3 )* %arg4 , align 4
@@ -99,65 +62,42 @@ define amdgpu_vs void @test_2(<4 x i32> inreg %arg1, i32 %arg2, i32 inreg %arg3,
99
62
define amdgpu_vs void @test_3 (i32 inreg %arg1 , i32 inreg %arg2 , <4 x i32 > inreg %arg3 , i32 %arg4 , <6 x float > addrspace (3 )* %arg5 , <6 x float > addrspace (3 )* %arg6 ) {
100
63
; CHECK-LABEL: test_3:
101
64
; CHECK: ; %bb.0:
102
- ; CHECK-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
103
- ; CHECK-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
104
- ; CHECK-NEXT: s_mov_b32 s10, -1
105
- ; CHECK-NEXT: s_mov_b32 s11, 0xe8f000
106
- ; CHECK-NEXT: s_add_u32 s8, s8, s6
107
- ; CHECK-NEXT: s_addc_u32 s9, s9, 0
108
65
; CHECK-NEXT: s_mov_b32 s7, s5
109
66
; CHECK-NEXT: s_mov_b32 s6, s4
110
67
; CHECK-NEXT: s_mov_b32 s5, s3
111
68
; CHECK-NEXT: s_mov_b32 s4, s2
112
- ; CHECK-NEXT: v_add_i32_e32 v0, vcc, 8 , v1
113
- ; CHECK-NEXT: v_add_i32_e32 v3 , vcc, 12, v1
114
- ; CHECK-NEXT: v_add_i32_e32 v4, vcc, 4 , v1
115
- ; CHECK-NEXT: v_add_i32_e32 v7, vcc, 16 , v1
69
+ ; CHECK-NEXT: v_add_i32_e32 v0, vcc, 16 , v1
70
+ ; CHECK-NEXT: v_add_i32_e32 v6 , vcc, 12, v1
71
+ ; CHECK-NEXT: v_add_i32_e32 v4, vcc, 8 , v1
72
+ ; CHECK-NEXT: v_add_i32_e32 v7, vcc, 4 , v1
116
73
; CHECK-NEXT: v_add_i32_e32 v8, vcc, 20, v1
117
74
; CHECK-NEXT: v_mov_b32_e32 v9, s0
118
- ; CHECK-NEXT: v_add_i32_e32 v10, vcc, 8 , v2
75
+ ; CHECK-NEXT: v_add_i32_e32 v10, vcc, 16 , v2
119
76
; CHECK-NEXT: v_add_i32_e32 v11, vcc, 12, v2
120
- ; CHECK-NEXT: v_add_i32_e32 v12, vcc, 4, v2
121
- ; CHECK-NEXT: v_add_i32_e32 v13, vcc, 16, v2
122
- ; CHECK-NEXT: v_add_i32_e32 v14, vcc, 20, v2
77
+ ; CHECK-NEXT: v_add_i32_e32 v12, vcc, 8, v2
123
78
; CHECK-NEXT: s_mov_b32 m0, -1
124
- ; CHECK-NEXT: ds_read_b32 v5, v0
125
- ; CHECK-NEXT: ds_read_b32 v6, v3
126
- ; CHECK-NEXT: ds_read_b32 v4, v4
127
- ; CHECK-NEXT: ds_read_b32 v8, v8
128
- ; CHECK-NEXT: ds_read_b32 v7, v7
129
79
; CHECK-NEXT: ds_read_b32 v3, v1
130
- ; CHECK-NEXT: s_waitcnt lgkmcnt(4)
131
- ; CHECK-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:44
132
- ; CHECK-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:40
133
- ; CHECK-NEXT: s_waitcnt lgkmcnt(3)
134
- ; CHECK-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:36
135
- ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
136
- ; CHECK-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:32
80
+ ; CHECK-NEXT: ds_read_b32 v5, v4
81
+ ; CHECK-NEXT: ds_read_b32 v4, v7
82
+ ; CHECK-NEXT: ds_read_b32 v1, v8
83
+ ; CHECK-NEXT: ds_read_b32 v6, v6
84
+ ; CHECK-NEXT: ds_read_b32 v0, v0
85
+ ; CHECK-NEXT: v_add_i32_e32 v7, vcc, 4, v2
86
+ ; CHECK-NEXT: v_add_i32_e32 v8, vcc, 20, v2
87
+ ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
137
88
; CHECK-NEXT: tbuffer_store_format_xyzw v[3:6], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:264 glc slc
138
- ; CHECK-NEXT: tbuffer_store_format_xy v[7:8], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:280 glc slc
139
- ; CHECK-NEXT: ds_read_b32 v0, v10
140
- ; CHECK-NEXT: ds_read_b32 v1, v11
141
- ; CHECK-NEXT: s_waitcnt expcnt(1)
142
- ; CHECK-NEXT: ds_read_b32 v3, v12
143
- ; CHECK-NEXT: ds_read_b32 v4, v13
144
- ; CHECK-NEXT: ds_read_b32 v2, v2
145
- ; CHECK-NEXT: s_waitcnt lgkmcnt(3)
146
- ; CHECK-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:28
147
- ; CHECK-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:24
148
- ; CHECK-NEXT: s_waitcnt lgkmcnt(2)
149
- ; CHECK-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:20
150
89
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
151
- ; CHECK-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:16
152
- ; CHECK-NEXT: s_waitcnt expcnt(1)
153
- ; CHECK-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:28
90
+ ; CHECK-NEXT: tbuffer_store_format_xy v[0:1], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:280 glc slc
154
91
; CHECK-NEXT: s_waitcnt expcnt(0)
155
- ; CHECK-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:24
156
- ; CHECK-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:20
157
- ; CHECK-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:16
158
- ; CHECK-NEXT: ds_read_b32 v5, v14
159
- ; CHECK-NEXT: s_waitcnt vmcnt(0)
92
+ ; CHECK-NEXT: ds_read_b32 v0, v2
93
+ ; CHECK-NEXT: ds_read_b32 v2, v12
94
+ ; CHECK-NEXT: ds_read_b32 v1, v7
95
+ ; CHECK-NEXT: ds_read_b32 v5, v8
96
+ ; CHECK-NEXT: ds_read_b32 v3, v11
97
+ ; CHECK-NEXT: ds_read_b32 v4, v10
98
+ ; CHECK-NEXT: s_waitcnt lgkmcnt(5)
160
99
; CHECK-NEXT: exp mrt0 off, off, off, off
100
+ ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
161
101
; CHECK-NEXT: tbuffer_store_format_xyzw v[0:3], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:240 glc slc
162
102
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
163
103
; CHECK-NEXT: tbuffer_store_format_xy v[4:5], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:256 glc slc
0 commit comments