11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
23; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7 %s
34; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
45; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
56
67define i8 @atomic_load_global_monotonic_i8 (ptr addrspace (1 ) %ptr ) {
8+ ; GFX6-LABEL: atomic_load_global_monotonic_i8:
9+ ; GFX6: ; %bb.0:
10+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11+ ; GFX6-NEXT: s_mov_b32 s6, 0
12+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
13+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
14+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
15+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
16+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
17+ ;
718; GFX7-LABEL: atomic_load_global_monotonic_i8:
819; GFX7: ; %bb.0:
920; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -29,6 +40,16 @@ define i8 @atomic_load_global_monotonic_i8(ptr addrspace(1) %ptr) {
2940}
3041
3142define i32 @atomic_load_global_monotonic_i8_zext_to_i32 (ptr addrspace (1 ) %ptr ) {
43+ ; GFX6-LABEL: atomic_load_global_monotonic_i8_zext_to_i32:
44+ ; GFX6: ; %bb.0:
45+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46+ ; GFX6-NEXT: s_mov_b32 s6, 0
47+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
48+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
49+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
50+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
51+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
52+ ;
3253; GFX7-LABEL: atomic_load_global_monotonic_i8_zext_to_i32:
3354; GFX7: ; %bb.0:
3455; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -55,6 +76,18 @@ define i32 @atomic_load_global_monotonic_i8_zext_to_i32(ptr addrspace(1) %ptr) {
5576}
5677
5778define i32 @atomic_load_global_monotonic_i8_sext_to_i32 (ptr addrspace (1 ) %ptr ) {
79+ ; GFX6-LABEL: atomic_load_global_monotonic_i8_sext_to_i32:
80+ ; GFX6: ; %bb.0:
81+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82+ ; GFX6-NEXT: s_mov_b32 s6, 0
83+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
84+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
85+ ; GFX6-NEXT: buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc
86+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
87+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
88+ ; GFX6-NEXT: v_mov_b32_e32 v0, v2
89+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
90+ ;
5891; GFX7-LABEL: atomic_load_global_monotonic_i8_sext_to_i32:
5992; GFX7: ; %bb.0:
6093; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -88,6 +121,16 @@ define i32 @atomic_load_global_monotonic_i8_sext_to_i32(ptr addrspace(1) %ptr) {
88121}
89122
90123define i16 @atomic_load_global_monotonic_i8_zext_to_i16 (ptr addrspace (1 ) %ptr ) {
124+ ; GFX6-LABEL: atomic_load_global_monotonic_i8_zext_to_i16:
125+ ; GFX6: ; %bb.0:
126+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127+ ; GFX6-NEXT: s_mov_b32 s6, 0
128+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
129+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
130+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
131+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
132+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
133+ ;
91134; GFX7-LABEL: atomic_load_global_monotonic_i8_zext_to_i16:
92135; GFX7: ; %bb.0:
93136; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -114,6 +157,18 @@ define i16 @atomic_load_global_monotonic_i8_zext_to_i16(ptr addrspace(1) %ptr) {
114157}
115158
116159define i16 @atomic_load_global_monotonic_i8_sext_to_i16 (ptr addrspace (1 ) %ptr ) {
160+ ; GFX6-LABEL: atomic_load_global_monotonic_i8_sext_to_i16:
161+ ; GFX6: ; %bb.0:
162+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163+ ; GFX6-NEXT: s_mov_b32 s6, 0
164+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
165+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
166+ ; GFX6-NEXT: buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc
167+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
168+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
169+ ; GFX6-NEXT: v_mov_b32_e32 v0, v2
170+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
171+ ;
117172; GFX7-LABEL: atomic_load_global_monotonic_i8_sext_to_i16:
118173; GFX7: ; %bb.0:
119174; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -147,6 +202,16 @@ define i16 @atomic_load_global_monotonic_i8_sext_to_i16(ptr addrspace(1) %ptr) {
147202}
148203
149204define i16 @atomic_load_global_monotonic_i16 (ptr addrspace (1 ) %ptr ) {
205+ ; GFX6-LABEL: atomic_load_global_monotonic_i16:
206+ ; GFX6: ; %bb.0:
207+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208+ ; GFX6-NEXT: s_mov_b32 s6, 0
209+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
210+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
211+ ; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
212+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
213+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
214+ ;
150215; GFX7-LABEL: atomic_load_global_monotonic_i16:
151216; GFX7: ; %bb.0:
152217; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -172,6 +237,16 @@ define i16 @atomic_load_global_monotonic_i16(ptr addrspace(1) %ptr) {
172237}
173238
174239define i32 @atomic_load_global_monotonic_i16_zext_to_i32 (ptr addrspace (1 ) %ptr ) {
240+ ; GFX6-LABEL: atomic_load_global_monotonic_i16_zext_to_i32:
241+ ; GFX6: ; %bb.0:
242+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243+ ; GFX6-NEXT: s_mov_b32 s6, 0
244+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
245+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
246+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
247+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
248+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
249+ ;
175250; GFX7-LABEL: atomic_load_global_monotonic_i16_zext_to_i32:
176251; GFX7: ; %bb.0:
177252; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -198,6 +273,18 @@ define i32 @atomic_load_global_monotonic_i16_zext_to_i32(ptr addrspace(1) %ptr)
198273}
199274
200275define i32 @atomic_load_global_monotonic_i16_sext_to_i32 (ptr addrspace (1 ) %ptr ) {
276+ ; GFX6-LABEL: atomic_load_global_monotonic_i16_sext_to_i32:
277+ ; GFX6: ; %bb.0:
278+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279+ ; GFX6-NEXT: s_mov_b32 s6, 0
280+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
281+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
282+ ; GFX6-NEXT: buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc
283+ ; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
284+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
285+ ; GFX6-NEXT: v_mov_b32_e32 v0, v2
286+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
287+ ;
201288; GFX7-LABEL: atomic_load_global_monotonic_i16_sext_to_i32:
202289; GFX7: ; %bb.0:
203290; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -231,6 +318,16 @@ define i32 @atomic_load_global_monotonic_i16_sext_to_i32(ptr addrspace(1) %ptr)
231318}
232319
233320define half @atomic_load_global_monotonic_f16 (ptr addrspace (1 ) %ptr ) {
321+ ; GFX6-LABEL: atomic_load_global_monotonic_f16:
322+ ; GFX6: ; %bb.0:
323+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
324+ ; GFX6-NEXT: s_mov_b32 s6, 0
325+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
326+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
327+ ; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
328+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
329+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
330+ ;
234331; GFX7-LABEL: atomic_load_global_monotonic_f16:
235332; GFX7: ; %bb.0:
236333; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -256,6 +353,16 @@ define half @atomic_load_global_monotonic_f16(ptr addrspace(1) %ptr) {
256353}
257354
258355define bfloat @atomic_load_global_monotonic_bf16 (ptr addrspace (1 ) %ptr ) {
356+ ; GFX6-LABEL: atomic_load_global_monotonic_bf16:
357+ ; GFX6: ; %bb.0:
358+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
359+ ; GFX6-NEXT: s_mov_b32 s6, 0
360+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
361+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
362+ ; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
363+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
364+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
365+ ;
259366; GFX7-LABEL: atomic_load_global_monotonic_bf16:
260367; GFX7: ; %bb.0:
261368; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -281,6 +388,16 @@ define bfloat @atomic_load_global_monotonic_bf16(ptr addrspace(1) %ptr) {
281388}
282389
283390define i32 @atomic_load_global_monotonic_f16_zext_to_i32 (ptr addrspace (1 ) %ptr ) {
391+ ; GFX6-LABEL: atomic_load_global_monotonic_f16_zext_to_i32:
392+ ; GFX6: ; %bb.0:
393+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
394+ ; GFX6-NEXT: s_mov_b32 s6, 0
395+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
396+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
397+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
398+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
399+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
400+ ;
284401; GFX7-LABEL: atomic_load_global_monotonic_f16_zext_to_i32:
285402; GFX7: ; %bb.0:
286403; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -308,6 +425,16 @@ define i32 @atomic_load_global_monotonic_f16_zext_to_i32(ptr addrspace(1) %ptr)
308425}
309426
310427define i32 @atomic_load_global_monotonic_bf16_zext_to_i32 (ptr addrspace (1 ) %ptr ) {
428+ ; GFX6-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32:
429+ ; GFX6: ; %bb.0:
430+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431+ ; GFX6-NEXT: s_mov_b32 s6, 0
432+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
433+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
434+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
435+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
436+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
437+ ;
311438; GFX7-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32:
312439; GFX7: ; %bb.0:
313440; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -335,6 +462,17 @@ define i32 @atomic_load_global_monotonic_bf16_zext_to_i32(ptr addrspace(1) %ptr)
335462}
336463
337464define i32 @atomic_load_global_monotonic_i16_d16_hi_shift (ptr addrspace (1 ) %ptr ) {
465+ ; GFX6-LABEL: atomic_load_global_monotonic_i16_d16_hi_shift:
466+ ; GFX6: ; %bb.0:
467+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
468+ ; GFX6-NEXT: s_mov_b32 s6, 0
469+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
470+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
471+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
472+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
473+ ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
474+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
475+ ;
338476; GFX7-LABEL: atomic_load_global_monotonic_i16_d16_hi_shift:
339477; GFX7: ; %bb.0:
340478; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -365,6 +503,23 @@ define i32 @atomic_load_global_monotonic_i16_d16_hi_shift(ptr addrspace(1) %ptr)
365503}
366504
367505define <2 x i16 > @atomic_load_global_monotonic_i16_d16_hi_vector_insert (ptr addrspace (1 ) %ptr , <2 x i16 > %vec ) {
506+ ; GFX6-LABEL: atomic_load_global_monotonic_i16_d16_hi_vector_insert:
507+ ; GFX6: ; %bb.0:
508+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
509+ ; GFX6-NEXT: s_mov_b32 s6, 0
510+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
511+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
512+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
513+ ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
514+ ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
515+ ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
516+ ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
517+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
518+ ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
519+ ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
520+ ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
521+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
522+ ;
368523; GFX7-LABEL: atomic_load_global_monotonic_i16_d16_hi_vector_insert:
369524; GFX7: ; %bb.0:
370525; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -403,6 +558,19 @@ define <2 x i16> @atomic_load_global_monotonic_i16_d16_hi_vector_insert(ptr addr
403558}
404559
405560define i32 @atomic_load_global_monotonic_i16_d16_lo_or (ptr addrspace (1 ) %ptr , i16 %high ) {
561+ ; GFX6-LABEL: atomic_load_global_monotonic_i16_d16_lo_or:
562+ ; GFX6: ; %bb.0:
563+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
564+ ; GFX6-NEXT: s_mov_b32 s6, 0
565+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
566+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
567+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
568+ ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2
569+ ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
570+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
571+ ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
572+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
573+ ;
406574; GFX7-LABEL: atomic_load_global_monotonic_i16_d16_lo_or:
407575; GFX7: ; %bb.0:
408576; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -440,6 +608,22 @@ define i32 @atomic_load_global_monotonic_i16_d16_lo_or(ptr addrspace(1) %ptr, i1
440608}
441609
442610define <2 x i16 > @atomic_load_global_monotonic_i16_d16_lo_vector_insert (ptr addrspace (1 ) %ptr , <2 x i16 > %vec ) {
611+ ; GFX6-LABEL: atomic_load_global_monotonic_i16_d16_lo_vector_insert:
612+ ; GFX6: ; %bb.0:
613+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
614+ ; GFX6-NEXT: s_mov_b32 s6, 0
615+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
616+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
617+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
618+ ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
619+ ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
620+ ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
621+ ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
622+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
623+ ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
624+ ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
625+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
626+ ;
443627; GFX7-LABEL: atomic_load_global_monotonic_i16_d16_lo_vector_insert:
444628; GFX7: ; %bb.0:
445629; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
0 commit comments