3
3
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
4
4
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
5
5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
6
+ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-TRUE16 %s
7
+ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
6
8
7
9
define i8 @atomic_load_monotonic_i8 (ptr addrspace (3 ) %ptr ) {
8
10
; CI-LABEL: atomic_load_monotonic_i8:
@@ -33,6 +35,14 @@ define i8 @atomic_load_monotonic_i8(ptr addrspace(3) %ptr) {
33
35
; GFX11-FAKE16-NEXT: ds_load_u8 v0, v0
34
36
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
35
37
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
38
+ ;
39
+ ; GFX1250-LABEL: atomic_load_monotonic_i8:
40
+ ; GFX1250: ; %bb.0:
41
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
42
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
43
+ ; GFX1250-NEXT: ds_load_u8 v0, v0
44
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
45
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
36
46
%load = load atomic i8 , ptr addrspace (3 ) %ptr monotonic , align 1
37
47
ret i8 %load
38
48
}
@@ -66,6 +76,14 @@ define i8 @atomic_load_monotonic_i8_offset(ptr addrspace(3) %ptr) {
66
76
; GFX11-FAKE16-NEXT: ds_load_u8 v0, v0 offset:16
67
77
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
68
78
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
79
+ ;
80
+ ; GFX1250-LABEL: atomic_load_monotonic_i8_offset:
81
+ ; GFX1250: ; %bb.0:
82
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
83
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
84
+ ; GFX1250-NEXT: ds_load_u8 v0, v0 offset:16
85
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
86
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
69
87
%gep = getelementptr inbounds i8 , ptr addrspace (3 ) %ptr , i8 16
70
88
%load = load atomic i8 , ptr addrspace (3 ) %gep monotonic , align 1
71
89
ret i8 %load
@@ -100,6 +118,14 @@ define i16 @atomic_load_monotonic_i16(ptr addrspace(3) %ptr) {
100
118
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
101
119
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
102
120
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
121
+ ;
122
+ ; GFX1250-LABEL: atomic_load_monotonic_i16:
123
+ ; GFX1250: ; %bb.0:
124
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
125
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
126
+ ; GFX1250-NEXT: ds_load_u16 v0, v0
127
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
128
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
103
129
%load = load atomic i16 , ptr addrspace (3 ) %ptr monotonic , align 2
104
130
ret i16 %load
105
131
}
@@ -133,6 +159,14 @@ define i16 @atomic_load_monotonic_i16_offset(ptr addrspace(3) %ptr) {
133
159
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
134
160
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
135
161
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
162
+ ;
163
+ ; GFX1250-LABEL: atomic_load_monotonic_i16_offset:
164
+ ; GFX1250: ; %bb.0:
165
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
166
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
167
+ ; GFX1250-NEXT: ds_load_u16 v0, v0 offset:32
168
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
169
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
136
170
%gep = getelementptr inbounds i16 , ptr addrspace (3 ) %ptr , i16 16
137
171
%load = load atomic i16 , ptr addrspace (3 ) %gep monotonic , align 2
138
172
ret i16 %load
@@ -160,6 +194,14 @@ define i32 @atomic_load_monotonic_i32(ptr addrspace(3) %ptr) {
160
194
; GFX11-NEXT: ds_load_b32 v0, v0
161
195
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
162
196
; GFX11-NEXT: s_setpc_b64 s[30:31]
197
+ ;
198
+ ; GFX1250-LABEL: atomic_load_monotonic_i32:
199
+ ; GFX1250: ; %bb.0:
200
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
201
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
202
+ ; GFX1250-NEXT: ds_load_b32 v0, v0
203
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
204
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
163
205
%load = load atomic i32 , ptr addrspace (3 ) %ptr monotonic , align 4
164
206
ret i32 %load
165
207
}
@@ -186,6 +228,14 @@ define i32 @atomic_load_monotonic_i32_offset(ptr addrspace(3) %ptr) {
186
228
; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
187
229
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
188
230
; GFX11-NEXT: s_setpc_b64 s[30:31]
231
+ ;
232
+ ; GFX1250-LABEL: atomic_load_monotonic_i32_offset:
233
+ ; GFX1250: ; %bb.0:
234
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
235
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
236
+ ; GFX1250-NEXT: ds_load_b32 v0, v0 offset:64
237
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
238
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
189
239
%gep = getelementptr inbounds i32 , ptr addrspace (3 ) %ptr , i32 16
190
240
%load = load atomic i32 , ptr addrspace (3 ) %gep monotonic , align 4
191
241
ret i32 %load
@@ -213,6 +263,14 @@ define i64 @atomic_load_monotonic_i64(ptr addrspace(3) %ptr) {
213
263
; GFX11-NEXT: ds_load_b64 v[0:1], v0
214
264
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
215
265
; GFX11-NEXT: s_setpc_b64 s[30:31]
266
+ ;
267
+ ; GFX1250-LABEL: atomic_load_monotonic_i64:
268
+ ; GFX1250: ; %bb.0:
269
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
270
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
271
+ ; GFX1250-NEXT: ds_load_b64 v[0:1], v0
272
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
273
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
216
274
%load = load atomic i64 , ptr addrspace (3 ) %ptr monotonic , align 8
217
275
ret i64 %load
218
276
}
@@ -239,6 +297,14 @@ define i64 @atomic_load_monotonic_i64_offset(ptr addrspace(3) %ptr) {
239
297
; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
240
298
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
241
299
; GFX11-NEXT: s_setpc_b64 s[30:31]
300
+ ;
301
+ ; GFX1250-LABEL: atomic_load_monotonic_i64_offset:
302
+ ; GFX1250: ; %bb.0:
303
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
304
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
305
+ ; GFX1250-NEXT: ds_load_b64 v[0:1], v0 offset:128
306
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
307
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
242
308
%gep = getelementptr inbounds i64 , ptr addrspace (3 ) %ptr , i32 16
243
309
%load = load atomic i64 , ptr addrspace (3 ) %gep monotonic , align 8
244
310
ret i64 %load
@@ -266,6 +332,14 @@ define float @atomic_load_monotonic_f32_offset(ptr addrspace(3) %ptr) {
266
332
; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
267
333
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
268
334
; GFX11-NEXT: s_setpc_b64 s[30:31]
335
+ ;
336
+ ; GFX1250-LABEL: atomic_load_monotonic_f32_offset:
337
+ ; GFX1250: ; %bb.0:
338
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
339
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
340
+ ; GFX1250-NEXT: ds_load_b32 v0, v0 offset:64
341
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
342
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
269
343
%gep = getelementptr inbounds float , ptr addrspace (3 ) %ptr , i32 16
270
344
%load = load atomic float , ptr addrspace (3 ) %gep monotonic , align 4
271
345
ret float %load
@@ -293,6 +367,14 @@ define double @atomic_load_monotonic_f64_offset(ptr addrspace(3) %ptr) {
293
367
; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
294
368
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
295
369
; GFX11-NEXT: s_setpc_b64 s[30:31]
370
+ ;
371
+ ; GFX1250-LABEL: atomic_load_monotonic_f64_offset:
372
+ ; GFX1250: ; %bb.0:
373
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
374
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
375
+ ; GFX1250-NEXT: ds_load_b64 v[0:1], v0 offset:128
376
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
377
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
296
378
%gep = getelementptr inbounds double , ptr addrspace (3 ) %ptr , i32 16
297
379
%load = load atomic double , ptr addrspace (3 ) %gep monotonic , align 8
298
380
ret double %load
@@ -320,6 +402,14 @@ define ptr @atomic_load_monotonic_p0i8_offset(ptr addrspace(3) %ptr) {
320
402
; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
321
403
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
322
404
; GFX11-NEXT: s_setpc_b64 s[30:31]
405
+ ;
406
+ ; GFX1250-LABEL: atomic_load_monotonic_p0i8_offset:
407
+ ; GFX1250: ; %bb.0:
408
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
409
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
410
+ ; GFX1250-NEXT: ds_load_b64 v[0:1], v0 offset:128
411
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
412
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
323
413
%gep = getelementptr inbounds ptr , ptr addrspace (3 ) %ptr , i32 16
324
414
%load = load atomic ptr , ptr addrspace (3 ) %gep monotonic , align 8
325
415
ret ptr %load
@@ -347,6 +437,14 @@ define ptr addrspace(3) @atomic_load_monotonic_p3i8_offset(ptr addrspace(3) %ptr
347
437
; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
348
438
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
349
439
; GFX11-NEXT: s_setpc_b64 s[30:31]
440
+ ;
441
+ ; GFX1250-LABEL: atomic_load_monotonic_p3i8_offset:
442
+ ; GFX1250: ; %bb.0:
443
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
444
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
445
+ ; GFX1250-NEXT: ds_load_b32 v0, v0 offset:64
446
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
447
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
350
448
%gep = getelementptr inbounds ptr addrspace (3 ), ptr addrspace (3 ) %ptr , i32 16
351
449
%load = load atomic ptr addrspace (3 ), ptr addrspace (3 ) %gep monotonic , align 4
352
450
ret ptr addrspace (3 ) %load
@@ -381,6 +479,14 @@ define i16 @atomic_load_monotonic_f16(ptr addrspace(3) %ptr) {
381
479
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
382
480
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
383
481
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
482
+ ;
483
+ ; GFX1250-LABEL: atomic_load_monotonic_f16:
484
+ ; GFX1250: ; %bb.0:
485
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
486
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
487
+ ; GFX1250-NEXT: ds_load_u16 v0, v0
488
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
489
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
384
490
%load = load atomic half , ptr addrspace (3 ) %ptr monotonic , align 2
385
491
%ret = bitcast half %load to i16
386
492
ret i16 %ret
@@ -415,6 +521,14 @@ define i16 @atomic_load_monotonic_f16_offset(ptr addrspace(3) %ptr) {
415
521
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
416
522
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
417
523
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
524
+ ;
525
+ ; GFX1250-LABEL: atomic_load_monotonic_f16_offset:
526
+ ; GFX1250: ; %bb.0:
527
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
528
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
529
+ ; GFX1250-NEXT: ds_load_u16 v0, v0 offset:32
530
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
531
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
418
532
%gep = getelementptr inbounds half , ptr addrspace (3 ) %ptr , i32 16
419
533
%load = load atomic half , ptr addrspace (3 ) %gep monotonic , align 2
420
534
%ret = bitcast half %load to i16
@@ -450,6 +564,14 @@ define i16 @atomic_load_monotonic_bf16(ptr addrspace(3) %ptr) {
450
564
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
451
565
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
452
566
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
567
+ ;
568
+ ; GFX1250-LABEL: atomic_load_monotonic_bf16:
569
+ ; GFX1250: ; %bb.0:
570
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
571
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
572
+ ; GFX1250-NEXT: ds_load_u16 v0, v0
573
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
574
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
453
575
%load = load atomic bfloat, ptr addrspace (3 ) %ptr monotonic , align 2
454
576
%ret = bitcast bfloat %load to i16
455
577
ret i16 %ret
@@ -484,10 +606,20 @@ define i16 @atomic_load_monotonic_bf16_offset(ptr addrspace(3) %ptr) {
484
606
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
485
607
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
486
608
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
609
+ ;
610
+ ; GFX1250-LABEL: atomic_load_monotonic_bf16_offset:
611
+ ; GFX1250: ; %bb.0:
612
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
613
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
614
+ ; GFX1250-NEXT: ds_load_u16 v0, v0 offset:32
615
+ ; GFX1250-NEXT: s_wait_dscnt 0x0
616
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
487
617
%gep = getelementptr inbounds bfloat, ptr addrspace (3 ) %ptr , i32 16
488
618
%load = load atomic bfloat, ptr addrspace (3 ) %gep monotonic , align 2
489
619
%ret = bitcast bfloat %load to i16
490
620
ret i16 %ret
491
621
}
492
622
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
493
623
; GCN: {{.*}}
624
+ ; GFX1250-FAKE16: {{.*}}
625
+ ; GFX1250-TRUE16: {{.*}}
0 commit comments