Skip to content

Commit 5f28389

Browse files
committed
update ll tests
1 parent 3e669c8 commit 5f28389

File tree

173 files changed

+38024
-34247
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

173 files changed

+38024
-34247
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll

Lines changed: 39 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,8 @@ define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) {
4040
; GFX7-LABEL: v_add_v2i16_fneg_lhs:
4141
; GFX7: ; %bb.0:
4242
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43-
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
44-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
45-
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
46-
; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
47-
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0
43+
; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0
44+
; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -v1
4845
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
4946
; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
5047
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -79,11 +76,8 @@ define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) {
7976
; GFX7-LABEL: v_add_v2i16_fneg_rhs:
8077
; GFX7: ; %bb.0:
8178
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82-
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
83-
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
84-
; GFX7-NEXT: v_or_b32_e32 v2, v3, v2
85-
; GFX7-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
86-
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2
79+
; GFX7-NEXT: v_cvt_f16_f32_e64 v2, -v2
80+
; GFX7-NEXT: v_cvt_f16_f32_e64 v3, -v3
8781
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
8882
; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
8983
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -118,18 +112,12 @@ define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
118112
; GFX7-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
119113
; GFX7: ; %bb.0:
120114
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121-
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
122-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
123-
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
124-
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v3
125-
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
126-
; GFX7-NEXT: v_or_b32_e32 v1, v1, v2
127-
; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
128-
; GFX7-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
129-
; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0
130-
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v1
131-
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
132-
; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v3
115+
; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0
116+
; GFX7-NEXT: v_cvt_f16_f32_e64 v2, -v2
117+
; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -v1
118+
; GFX7-NEXT: v_cvt_f16_f32_e64 v3, -v3
119+
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
120+
; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
133121
; GFX7-NEXT: s_setpc_b64 s[30:31]
134122
;
135123
; GFX9-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
@@ -434,17 +422,15 @@ define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
434422
define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg %b) {
435423
; GFX7-LABEL: s_add_v2i16_fneg_lhs:
436424
; GFX7: ; %bb.0:
437-
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
438-
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
439-
; GFX7-NEXT: s_or_b32 s0, s1, s0
440-
; GFX7-NEXT: s_xor_b32 s0, s0, 0x80008000
441-
; GFX7-NEXT: s_lshr_b32 s1, s0, 16
442-
; GFX7-NEXT: s_add_i32 s1, s1, s3
443-
; GFX7-NEXT: s_add_i32 s0, s0, s2
444-
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
445-
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
446-
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
447-
; GFX7-NEXT: s_or_b32 s0, s0, s1
425+
; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -s1
426+
; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -s0
427+
; GFX7-NEXT: v_add_i32_e32 v1, vcc, s3, v1
428+
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s2, v0
429+
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
430+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
431+
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
432+
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
433+
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
448434
; GFX7-NEXT: ; return to shader part epilog
449435
;
450436
; GFX9-LABEL: s_add_v2i16_fneg_lhs:
@@ -490,17 +476,15 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg
490476
define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg %b) {
491477
; GFX7-LABEL: s_add_v2i16_fneg_rhs:
492478
; GFX7: ; %bb.0:
493-
; GFX7-NEXT: s_lshl_b32 s3, s3, 16
494-
; GFX7-NEXT: s_and_b32 s2, s2, 0xffff
495-
; GFX7-NEXT: s_or_b32 s2, s3, s2
496-
; GFX7-NEXT: s_xor_b32 s2, s2, 0x80008000
497-
; GFX7-NEXT: s_lshr_b32 s3, s2, 16
498-
; GFX7-NEXT: s_add_i32 s1, s1, s3
499-
; GFX7-NEXT: s_add_i32 s0, s0, s2
500-
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
501-
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
502-
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
503-
; GFX7-NEXT: s_or_b32 s0, s0, s1
479+
; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -s3
480+
; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -s2
481+
; GFX7-NEXT: v_add_i32_e32 v1, vcc, s1, v1
482+
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
483+
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
484+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
485+
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
486+
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
487+
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
504488
; GFX7-NEXT: ; return to shader part epilog
505489
;
506490
; GFX9-LABEL: s_add_v2i16_fneg_rhs:
@@ -546,22 +530,17 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg
546530
define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x half> inreg %b) {
547531
; GFX7-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:
548532
; GFX7: ; %bb.0:
549-
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
550-
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
551-
; GFX7-NEXT: s_or_b32 s0, s1, s0
552-
; GFX7-NEXT: s_lshl_b32 s1, s3, 16
553-
; GFX7-NEXT: s_and_b32 s2, s2, 0xffff
554-
; GFX7-NEXT: s_or_b32 s1, s1, s2
555-
; GFX7-NEXT: s_xor_b32 s0, s0, 0x80008000
556-
; GFX7-NEXT: s_xor_b32 s1, s1, 0x80008000
557-
; GFX7-NEXT: s_lshr_b32 s2, s0, 16
558-
; GFX7-NEXT: s_lshr_b32 s3, s1, 16
559-
; GFX7-NEXT: s_add_i32 s2, s2, s3
560-
; GFX7-NEXT: s_add_i32 s0, s0, s1
561-
; GFX7-NEXT: s_and_b32 s1, s2, 0xffff
562-
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
563-
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
564-
; GFX7-NEXT: s_or_b32 s0, s0, s1
533+
; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -s0
534+
; GFX7-NEXT: v_cvt_f16_f32_e64 v1, -s2
535+
; GFX7-NEXT: v_cvt_f16_f32_e64 v2, -s1
536+
; GFX7-NEXT: v_cvt_f16_f32_e64 v3, -s3
537+
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
538+
; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v3
539+
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
540+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
541+
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
542+
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
543+
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
565544
; GFX7-NEXT: ; return to shader part epilog
566545
;
567546
; GFX9-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:

llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
; Tests for add.
77
; CHECK: name: addi32
8-
; CHECK: {{%[0-9]+}}:_(s32) = G_ADD
8+
; CHECK: {{%[0-9]+}}:_(i32) = G_ADD
99
define amdgpu_kernel void @addi32(i32 %arg1, i32 %arg2) {
1010
%res = add i32 %arg1, %arg2
1111
store i32 %res, ptr addrspace(1) undef

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,7 @@ define amdgpu_ps half @ashr_i16_sv(i16 inreg %value, i16 %amount) {
695695
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
696696
; GFX6-NEXT: s_sext_i32_i16 s0, s0
697697
; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0
698+
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
698699
; GFX6-NEXT: ; return to shader part epilog
699700
;
700701
; GFX8-LABEL: ashr_i16_sv:
@@ -722,6 +723,7 @@ define amdgpu_ps half @ashr_i16_vs(i16 %value, i16 inreg %amount) {
722723
; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
723724
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
724725
; GFX6-NEXT: v_ashrrev_i32_e32 v0, s0, v0
726+
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
725727
; GFX6-NEXT: ; return to shader part epilog
726728
;
727729
; GFX8-LABEL: ashr_i16_vs:

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -98,23 +98,53 @@ define i32 @atomic_load_flat_monotonic_i16_sext_to_i32(ptr %ptr) {
9898
}
9999

100100
define half @atomic_load_flat_monotonic_f16(ptr %ptr) {
101-
; GCN-LABEL: atomic_load_flat_monotonic_f16:
102-
; GCN: ; %bb.0:
103-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104-
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
105-
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
106-
; GCN-NEXT: s_setpc_b64 s[30:31]
101+
; GFX7-LABEL: atomic_load_flat_monotonic_f16:
102+
; GFX7: ; %bb.0:
103+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104+
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
105+
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
106+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
107+
; GFX7-NEXT: s_setpc_b64 s[30:31]
108+
;
109+
; GFX8-LABEL: atomic_load_flat_monotonic_f16:
110+
; GFX8: ; %bb.0:
111+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112+
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
113+
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
114+
; GFX8-NEXT: s_setpc_b64 s[30:31]
115+
;
116+
; GFX9-LABEL: atomic_load_flat_monotonic_f16:
117+
; GFX9: ; %bb.0:
118+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119+
; GFX9-NEXT: flat_load_ushort v0, v[0:1] glc
120+
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
121+
; GFX9-NEXT: s_setpc_b64 s[30:31]
107122
%load = load atomic half, ptr %ptr monotonic, align 2
108123
ret half %load
109124
}
110125

111126
define bfloat @atomic_load_flat_monotonic_bf16(ptr %ptr) {
112-
; GCN-LABEL: atomic_load_flat_monotonic_bf16:
113-
; GCN: ; %bb.0:
114-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115-
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
116-
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
117-
; GCN-NEXT: s_setpc_b64 s[30:31]
127+
; GFX7-LABEL: atomic_load_flat_monotonic_bf16:
128+
; GFX7: ; %bb.0:
129+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130+
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
131+
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
132+
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
133+
; GFX7-NEXT: s_setpc_b64 s[30:31]
134+
;
135+
; GFX8-LABEL: atomic_load_flat_monotonic_bf16:
136+
; GFX8: ; %bb.0:
137+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138+
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
139+
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
140+
; GFX8-NEXT: s_setpc_b64 s[30:31]
141+
;
142+
; GFX9-LABEL: atomic_load_flat_monotonic_bf16:
143+
; GFX9: ; %bb.0:
144+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145+
; GFX9-NEXT: flat_load_ushort v0, v[0:1] glc
146+
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
147+
; GFX9-NEXT: s_setpc_b64 s[30:31]
118148
%load = load atomic bfloat, ptr %ptr monotonic, align 2
119149
ret bfloat %load
120150
}
@@ -125,6 +155,7 @@ define i32 @atomic_load_flat_monotonic_f16_zext_to_i32(ptr %ptr) {
125155
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126156
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
127157
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
158+
; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
128159
; GCN-NEXT: s_setpc_b64 s[30:31]
129160
%load = load atomic half, ptr %ptr monotonic, align 2
130161
%cast = bitcast half %load to i16
@@ -138,6 +169,7 @@ define i32 @atomic_load_flat_monotonic_bf16_zext_to_i32(ptr %ptr) {
138169
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139170
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
140171
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
172+
; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
141173
; GCN-NEXT: s_setpc_b64 s[30:31]
142174
%load = load atomic bfloat, ptr %ptr monotonic, align 2
143175
%cast = bitcast bfloat %load to i16

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,13 +299,15 @@ define half @atomic_load_global_monotonic_f16(ptr addrspace(1) %ptr) {
299299
; GFX6-NEXT: s_mov_b64 s[4:5], 0
300300
; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
301301
; GFX6-NEXT: s_waitcnt vmcnt(0)
302+
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
302303
; GFX6-NEXT: s_setpc_b64 s[30:31]
303304
;
304305
; GFX7-LABEL: atomic_load_global_monotonic_f16:
305306
; GFX7: ; %bb.0:
306307
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307308
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
308309
; GFX7-NEXT: s_waitcnt vmcnt(0)
310+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
309311
; GFX7-NEXT: s_setpc_b64 s[30:31]
310312
;
311313
; GFX8-LABEL: atomic_load_global_monotonic_f16:
@@ -334,13 +336,15 @@ define bfloat @atomic_load_global_monotonic_bf16(ptr addrspace(1) %ptr) {
334336
; GFX6-NEXT: s_mov_b64 s[4:5], 0
335337
; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
336338
; GFX6-NEXT: s_waitcnt vmcnt(0)
339+
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
337340
; GFX6-NEXT: s_setpc_b64 s[30:31]
338341
;
339342
; GFX7-LABEL: atomic_load_global_monotonic_bf16:
340343
; GFX7: ; %bb.0:
341344
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342345
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
343346
; GFX7-NEXT: s_waitcnt vmcnt(0)
347+
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
344348
; GFX7-NEXT: s_setpc_b64 s[30:31]
345349
;
346350
; GFX8-LABEL: atomic_load_global_monotonic_bf16:
@@ -369,27 +373,31 @@ define i32 @atomic_load_global_monotonic_f16_zext_to_i32(ptr addrspace(1) %ptr)
369373
; GFX6-NEXT: s_mov_b64 s[4:5], 0
370374
; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
371375
; GFX6-NEXT: s_waitcnt vmcnt(0)
376+
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
372377
; GFX6-NEXT: s_setpc_b64 s[30:31]
373378
;
374379
; GFX7-LABEL: atomic_load_global_monotonic_f16_zext_to_i32:
375380
; GFX7: ; %bb.0:
376381
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377382
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
378383
; GFX7-NEXT: s_waitcnt vmcnt(0)
384+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
379385
; GFX7-NEXT: s_setpc_b64 s[30:31]
380386
;
381387
; GFX8-LABEL: atomic_load_global_monotonic_f16_zext_to_i32:
382388
; GFX8: ; %bb.0:
383389
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384390
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
385391
; GFX8-NEXT: s_waitcnt vmcnt(0)
392+
; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
386393
; GFX8-NEXT: s_setpc_b64 s[30:31]
387394
;
388395
; GFX9-LABEL: atomic_load_global_monotonic_f16_zext_to_i32:
389396
; GFX9: ; %bb.0:
390397
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391398
; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc
392399
; GFX9-NEXT: s_waitcnt vmcnt(0)
400+
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
393401
; GFX9-NEXT: s_setpc_b64 s[30:31]
394402
%load = load atomic half, ptr addrspace(1) %ptr monotonic, align 2
395403
%cast = bitcast half %load to i16
@@ -406,27 +414,31 @@ define i32 @atomic_load_global_monotonic_bf16_zext_to_i32(ptr addrspace(1) %ptr)
406414
; GFX6-NEXT: s_mov_b64 s[4:5], 0
407415
; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
408416
; GFX6-NEXT: s_waitcnt vmcnt(0)
417+
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
409418
; GFX6-NEXT: s_setpc_b64 s[30:31]
410419
;
411420
; GFX7-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32:
412421
; GFX7: ; %bb.0:
413422
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414423
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
415424
; GFX7-NEXT: s_waitcnt vmcnt(0)
425+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
416426
; GFX7-NEXT: s_setpc_b64 s[30:31]
417427
;
418428
; GFX8-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32:
419429
; GFX8: ; %bb.0:
420430
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421431
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
422432
; GFX8-NEXT: s_waitcnt vmcnt(0)
433+
; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
423434
; GFX8-NEXT: s_setpc_b64 s[30:31]
424435
;
425436
; GFX9-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32:
426437
; GFX9: ; %bb.0:
427438
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428439
; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc
429440
; GFX9-NEXT: s_waitcnt vmcnt(0)
441+
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
430442
; GFX9-NEXT: s_setpc_b64 s[30:31]
431443
%load = load atomic bfloat, ptr addrspace(1) %ptr monotonic, align 2
432444
%cast = bitcast bfloat %load to i16

0 commit comments

Comments
 (0)