Skip to content

Commit c37b02a

Browse files
committed
update ll tests
1 parent 1563cfe commit c37b02a

File tree

171 files changed

+38623
-34785
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

171 files changed

+38623
-34785
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) {
5252
; GFX9-LABEL: v_add_v2i16_fneg_lhs:
5353
; GFX9: ; %bb.0:
5454
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55-
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
55+
; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
56+
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
5657
; GFX9-NEXT: s_setpc_b64 s[30:31]
5758
;
5859
; GFX8-LABEL: v_add_v2i16_fneg_lhs:
@@ -67,7 +68,8 @@ define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) {
6768
; GFX10-LABEL: v_add_v2i16_fneg_lhs:
6869
; GFX10: ; %bb.0:
6970
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70-
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
71+
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
72+
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
7173
; GFX10-NEXT: s_setpc_b64 s[30:31]
7274
%neg.a = fneg <2 x half> %a
7375
%cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
@@ -91,7 +93,8 @@ define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) {
9193
; GFX9-LABEL: v_add_v2i16_fneg_rhs:
9294
; GFX9: ; %bb.0:
9395
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94-
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
96+
; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
97+
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
9598
; GFX9-NEXT: s_setpc_b64 s[30:31]
9699
;
97100
; GFX8-LABEL: v_add_v2i16_fneg_rhs:
@@ -106,7 +109,8 @@ define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) {
106109
; GFX10-LABEL: v_add_v2i16_fneg_rhs:
107110
; GFX10: ; %bb.0:
108111
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109-
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
112+
; GFX10-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
113+
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
110114
; GFX10-NEXT: s_setpc_b64 s[30:31]
111115
%neg.b = fneg <2 x half> %b
112116
%cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
@@ -135,7 +139,9 @@ define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
135139
; GFX9-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
136140
; GFX9: ; %bb.0:
137141
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138-
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
142+
; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
143+
; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
144+
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
139145
; GFX9-NEXT: s_setpc_b64 s[30:31]
140146
;
141147
; GFX8-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
@@ -151,7 +157,9 @@ define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
151157
; GFX10-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
152158
; GFX10: ; %bb.0:
153159
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154-
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
160+
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
161+
; GFX10-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
162+
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
155163
; GFX10-NEXT: s_setpc_b64 s[30:31]
156164
%neg.a = fneg <2 x half> %a
157165
%neg.b = fneg <2 x half> %b

llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
; Tests for add.
77
; CHECK: name: addi32
8-
; CHECK: {{%[0-9]+}}:_(s32) = G_ADD
8+
; CHECK: {{%[0-9]+}}:_(i32) = G_ADD
99
define amdgpu_kernel void @addi32(i32 %arg1, i32 %arg2) {
1010
%res = add i32 %arg1, %arg2
1111
store i32 %res, ptr addrspace(1) undef

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,7 @@ define amdgpu_ps half @ashr_i16_sv(i16 inreg %value, i16 %amount) {
695695
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
696696
; GFX6-NEXT: s_sext_i32_i16 s0, s0
697697
; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0
698+
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
698699
; GFX6-NEXT: ; return to shader part epilog
699700
;
700701
; GFX8-LABEL: ashr_i16_sv:
@@ -722,6 +723,7 @@ define amdgpu_ps half @ashr_i16_vs(i16 %value, i16 inreg %amount) {
722723
; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
723724
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
724725
; GFX6-NEXT: v_ashrrev_i32_e32 v0, s0, v0
726+
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
725727
; GFX6-NEXT: ; return to shader part epilog
726728
;
727729
; GFX8-LABEL: ashr_i16_vs:

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -98,23 +98,53 @@ define i32 @atomic_load_flat_monotonic_i16_sext_to_i32(ptr %ptr) {
9898
}
9999

100100
define half @atomic_load_flat_monotonic_f16(ptr %ptr) {
101-
; GCN-LABEL: atomic_load_flat_monotonic_f16:
102-
; GCN: ; %bb.0:
103-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104-
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
105-
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
106-
; GCN-NEXT: s_setpc_b64 s[30:31]
101+
; GFX7-LABEL: atomic_load_flat_monotonic_f16:
102+
; GFX7: ; %bb.0:
103+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104+
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
105+
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
106+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
107+
; GFX7-NEXT: s_setpc_b64 s[30:31]
108+
;
109+
; GFX8-LABEL: atomic_load_flat_monotonic_f16:
110+
; GFX8: ; %bb.0:
111+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112+
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
113+
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
114+
; GFX8-NEXT: s_setpc_b64 s[30:31]
115+
;
116+
; GFX9-LABEL: atomic_load_flat_monotonic_f16:
117+
; GFX9: ; %bb.0:
118+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119+
; GFX9-NEXT: flat_load_ushort v0, v[0:1] glc
120+
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
121+
; GFX9-NEXT: s_setpc_b64 s[30:31]
107122
%load = load atomic half, ptr %ptr monotonic, align 2
108123
ret half %load
109124
}
110125

111126
define bfloat @atomic_load_flat_monotonic_bf16(ptr %ptr) {
112-
; GCN-LABEL: atomic_load_flat_monotonic_bf16:
113-
; GCN: ; %bb.0:
114-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115-
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
116-
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
117-
; GCN-NEXT: s_setpc_b64 s[30:31]
127+
; GFX7-LABEL: atomic_load_flat_monotonic_bf16:
128+
; GFX7: ; %bb.0:
129+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130+
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
131+
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
132+
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
133+
; GFX7-NEXT: s_setpc_b64 s[30:31]
134+
;
135+
; GFX8-LABEL: atomic_load_flat_monotonic_bf16:
136+
; GFX8: ; %bb.0:
137+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138+
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
139+
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
140+
; GFX8-NEXT: s_setpc_b64 s[30:31]
141+
;
142+
; GFX9-LABEL: atomic_load_flat_monotonic_bf16:
143+
; GFX9: ; %bb.0:
144+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145+
; GFX9-NEXT: flat_load_ushort v0, v[0:1] glc
146+
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
147+
; GFX9-NEXT: s_setpc_b64 s[30:31]
118148
%load = load atomic bfloat, ptr %ptr monotonic, align 2
119149
ret bfloat %load
120150
}
@@ -125,6 +155,7 @@ define i32 @atomic_load_flat_monotonic_f16_zext_to_i32(ptr %ptr) {
125155
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126156
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
127157
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
158+
; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
128159
; GCN-NEXT: s_setpc_b64 s[30:31]
129160
%load = load atomic half, ptr %ptr monotonic, align 2
130161
%cast = bitcast half %load to i16
@@ -138,6 +169,7 @@ define i32 @atomic_load_flat_monotonic_bf16_zext_to_i32(ptr %ptr) {
138169
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139170
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
140171
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
172+
; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
141173
; GCN-NEXT: s_setpc_b64 s[30:31]
142174
%load = load atomic bfloat, ptr %ptr monotonic, align 2
143175
%cast = bitcast bfloat %load to i16

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,13 +299,15 @@ define half @atomic_load_global_monotonic_f16(ptr addrspace(1) %ptr) {
299299
; GFX6-NEXT: s_mov_b64 s[4:5], 0
300300
; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
301301
; GFX6-NEXT: s_waitcnt vmcnt(0)
302+
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
302303
; GFX6-NEXT: s_setpc_b64 s[30:31]
303304
;
304305
; GFX7-LABEL: atomic_load_global_monotonic_f16:
305306
; GFX7: ; %bb.0:
306307
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307308
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
308309
; GFX7-NEXT: s_waitcnt vmcnt(0)
310+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
309311
; GFX7-NEXT: s_setpc_b64 s[30:31]
310312
;
311313
; GFX8-LABEL: atomic_load_global_monotonic_f16:
@@ -334,13 +336,15 @@ define bfloat @atomic_load_global_monotonic_bf16(ptr addrspace(1) %ptr) {
334336
; GFX6-NEXT: s_mov_b64 s[4:5], 0
335337
; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
336338
; GFX6-NEXT: s_waitcnt vmcnt(0)
339+
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
337340
; GFX6-NEXT: s_setpc_b64 s[30:31]
338341
;
339342
; GFX7-LABEL: atomic_load_global_monotonic_bf16:
340343
; GFX7: ; %bb.0:
341344
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342345
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
343346
; GFX7-NEXT: s_waitcnt vmcnt(0)
347+
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
344348
; GFX7-NEXT: s_setpc_b64 s[30:31]
345349
;
346350
; GFX8-LABEL: atomic_load_global_monotonic_bf16:
@@ -369,27 +373,31 @@ define i32 @atomic_load_global_monotonic_f16_zext_to_i32(ptr addrspace(1) %ptr)
369373
; GFX6-NEXT: s_mov_b64 s[4:5], 0
370374
; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
371375
; GFX6-NEXT: s_waitcnt vmcnt(0)
376+
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
372377
; GFX6-NEXT: s_setpc_b64 s[30:31]
373378
;
374379
; GFX7-LABEL: atomic_load_global_monotonic_f16_zext_to_i32:
375380
; GFX7: ; %bb.0:
376381
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377382
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
378383
; GFX7-NEXT: s_waitcnt vmcnt(0)
384+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
379385
; GFX7-NEXT: s_setpc_b64 s[30:31]
380386
;
381387
; GFX8-LABEL: atomic_load_global_monotonic_f16_zext_to_i32:
382388
; GFX8: ; %bb.0:
383389
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384390
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
385391
; GFX8-NEXT: s_waitcnt vmcnt(0)
392+
; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
386393
; GFX8-NEXT: s_setpc_b64 s[30:31]
387394
;
388395
; GFX9-LABEL: atomic_load_global_monotonic_f16_zext_to_i32:
389396
; GFX9: ; %bb.0:
390397
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391398
; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc
392399
; GFX9-NEXT: s_waitcnt vmcnt(0)
400+
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
393401
; GFX9-NEXT: s_setpc_b64 s[30:31]
394402
%load = load atomic half, ptr addrspace(1) %ptr monotonic, align 2
395403
%cast = bitcast half %load to i16
@@ -406,27 +414,31 @@ define i32 @atomic_load_global_monotonic_bf16_zext_to_i32(ptr addrspace(1) %ptr)
406414
; GFX6-NEXT: s_mov_b64 s[4:5], 0
407415
; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
408416
; GFX6-NEXT: s_waitcnt vmcnt(0)
417+
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
409418
; GFX6-NEXT: s_setpc_b64 s[30:31]
410419
;
411420
; GFX7-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32:
412421
; GFX7: ; %bb.0:
413422
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414423
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
415424
; GFX7-NEXT: s_waitcnt vmcnt(0)
425+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
416426
; GFX7-NEXT: s_setpc_b64 s[30:31]
417427
;
418428
; GFX8-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32:
419429
; GFX8: ; %bb.0:
420430
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421431
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
422432
; GFX8-NEXT: s_waitcnt vmcnt(0)
433+
; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
423434
; GFX8-NEXT: s_setpc_b64 s[30:31]
424435
;
425436
; GFX9-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32:
426437
; GFX9: ; %bb.0:
427438
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428439
; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc
429440
; GFX9-NEXT: s_waitcnt vmcnt(0)
441+
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
430442
; GFX9-NEXT: s_setpc_b64 s[30:31]
431443
%load = load atomic bfloat, ptr addrspace(1) %ptr monotonic, align 2
432444
%cast = bitcast bfloat %load to i16

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ define half @atomic_load_local_monotonic_f16(ptr addrspace(3) %ptr) {
234234
; GFX7-NEXT: s_mov_b32 m0, -1
235235
; GFX7-NEXT: ds_read_u16 v0, v0
236236
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
237+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
237238
; GFX7-NEXT: s_setpc_b64 s[30:31]
238239
;
239240
; GFX8-LABEL: atomic_load_local_monotonic_f16:
@@ -261,6 +262,7 @@ define bfloat @atomic_load_local_monotonic_bf16(ptr addrspace(3) %ptr) {
261262
; GFX7-NEXT: s_mov_b32 m0, -1
262263
; GFX7-NEXT: ds_read_u16 v0, v0
263264
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
265+
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
264266
; GFX7-NEXT: s_setpc_b64 s[30:31]
265267
;
266268
; GFX8-LABEL: atomic_load_local_monotonic_bf16:
@@ -288,6 +290,7 @@ define i32 @atomic_load_local_monotonic_f16_zext_to_i32(ptr addrspace(3) %ptr) {
288290
; GFX7-NEXT: s_mov_b32 m0, -1
289291
; GFX7-NEXT: ds_read_u16 v0, v0
290292
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
293+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
291294
; GFX7-NEXT: s_setpc_b64 s[30:31]
292295
;
293296
; GFX8-LABEL: atomic_load_local_monotonic_f16_zext_to_i32:
@@ -296,13 +299,15 @@ define i32 @atomic_load_local_monotonic_f16_zext_to_i32(ptr addrspace(3) %ptr) {
296299
; GFX8-NEXT: s_mov_b32 m0, -1
297300
; GFX8-NEXT: ds_read_u16 v0, v0
298301
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
302+
; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
299303
; GFX8-NEXT: s_setpc_b64 s[30:31]
300304
;
301305
; GFX9-LABEL: atomic_load_local_monotonic_f16_zext_to_i32:
302306
; GFX9: ; %bb.0:
303307
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304308
; GFX9-NEXT: ds_read_u16 v0, v0
305309
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
310+
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
306311
; GFX9-NEXT: s_setpc_b64 s[30:31]
307312
%load = load atomic half, ptr addrspace(3) %ptr monotonic, align 2
308313
%cast = bitcast half %load to i16
@@ -317,6 +322,7 @@ define i32 @atomic_load_local_monotonic_bf16_zext_to_i32(ptr addrspace(3) %ptr)
317322
; GFX7-NEXT: s_mov_b32 m0, -1
318323
; GFX7-NEXT: ds_read_u16 v0, v0
319324
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
325+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
320326
; GFX7-NEXT: s_setpc_b64 s[30:31]
321327
;
322328
; GFX8-LABEL: atomic_load_local_monotonic_bf16_zext_to_i32:
@@ -325,13 +331,15 @@ define i32 @atomic_load_local_monotonic_bf16_zext_to_i32(ptr addrspace(3) %ptr)
325331
; GFX8-NEXT: s_mov_b32 m0, -1
326332
; GFX8-NEXT: ds_read_u16 v0, v0
327333
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
334+
; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
328335
; GFX8-NEXT: s_setpc_b64 s[30:31]
329336
;
330337
; GFX9-LABEL: atomic_load_local_monotonic_bf16_zext_to_i32:
331338
; GFX9: ; %bb.0:
332339
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333340
; GFX9-NEXT: ds_read_u16 v0, v0
334341
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
342+
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
335343
; GFX9-NEXT: s_setpc_b64 s[30:31]
336344
%load = load atomic bfloat, ptr addrspace(3) %ptr monotonic, align 2
337345
%cast = bitcast bfloat %load to i16

0 commit comments

Comments
 (0)