Skip to content

Commit ea2efd0

Browse files
committed
update ll tests
1 parent 28b8610 commit ea2efd0

File tree

162 files changed

+37691
-34102
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

162 files changed

+37691
-34102
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,10 @@ define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) {
4040
; GFX7-LABEL: v_add_v2i16_fneg_lhs:
4141
; GFX7: ; %bb.0:
4242
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43-
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
43+
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
4444
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
45-
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
45+
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
46+
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
4647
; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
4748
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0
4849
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
@@ -52,7 +53,8 @@ define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) {
5253
; GFX9-LABEL: v_add_v2i16_fneg_lhs:
5354
; GFX9: ; %bb.0:
5455
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55-
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
56+
; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
57+
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
5658
; GFX9-NEXT: s_setpc_b64 s[30:31]
5759
;
5860
; GFX8-LABEL: v_add_v2i16_fneg_lhs:
@@ -67,7 +69,8 @@ define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) {
6769
; GFX10-LABEL: v_add_v2i16_fneg_lhs:
6870
; GFX10: ; %bb.0:
6971
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70-
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
72+
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
73+
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
7174
; GFX10-NEXT: s_setpc_b64 s[30:31]
7275
%neg.a = fneg <2 x half> %a
7376
%cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
@@ -79,9 +82,10 @@ define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) {
7982
; GFX7-LABEL: v_add_v2i16_fneg_rhs:
8083
; GFX7: ; %bb.0:
8184
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82-
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
85+
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff, v3
8386
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
84-
; GFX7-NEXT: v_or_b32_e32 v2, v3, v2
87+
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
88+
; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
8589
; GFX7-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
8690
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2
8791
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
@@ -91,7 +95,8 @@ define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) {
9195
; GFX9-LABEL: v_add_v2i16_fneg_rhs:
9296
; GFX9: ; %bb.0:
9397
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94-
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
98+
; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
99+
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
95100
; GFX9-NEXT: s_setpc_b64 s[30:31]
96101
;
97102
; GFX8-LABEL: v_add_v2i16_fneg_rhs:
@@ -106,7 +111,8 @@ define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) {
106111
; GFX10-LABEL: v_add_v2i16_fneg_rhs:
107112
; GFX10: ; %bb.0:
108113
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109-
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
114+
; GFX10-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
115+
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
110116
; GFX10-NEXT: s_setpc_b64 s[30:31]
111117
%neg.b = fneg <2 x half> %b
112118
%cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
@@ -118,11 +124,13 @@ define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
118124
; GFX7-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
119125
; GFX7: ; %bb.0:
120126
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121-
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
127+
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
122128
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
123-
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
124-
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v3
125-
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
129+
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
130+
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
131+
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v2
132+
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v3
133+
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
126134
; GFX7-NEXT: v_or_b32_e32 v1, v1, v2
127135
; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
128136
; GFX7-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
@@ -135,7 +143,9 @@ define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
135143
; GFX9-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
136144
; GFX9: ; %bb.0:
137145
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138-
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
146+
; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
147+
; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
148+
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
139149
; GFX9-NEXT: s_setpc_b64 s[30:31]
140150
;
141151
; GFX8-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
@@ -151,7 +161,9 @@ define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
151161
; GFX10-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
152162
; GFX10: ; %bb.0:
153163
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154-
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
164+
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
165+
; GFX10-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
166+
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
155167
; GFX10-NEXT: s_setpc_b64 s[30:31]
156168
%neg.a = fneg <2 x half> %a
157169
%neg.b = fneg <2 x half> %b
@@ -434,9 +446,10 @@ define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
434446
define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg %b) {
435447
; GFX7-LABEL: s_add_v2i16_fneg_lhs:
436448
; GFX7: ; %bb.0:
449+
; GFX7-NEXT: s_and_b32 s1, 0xffff, s1
450+
; GFX7-NEXT: s_and_b32 s0, 0xffff, s0
437451
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
438-
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
439-
; GFX7-NEXT: s_or_b32 s0, s1, s0
452+
; GFX7-NEXT: s_or_b32 s0, s0, s1
440453
; GFX7-NEXT: s_xor_b32 s0, s0, 0x80008000
441454
; GFX7-NEXT: s_lshr_b32 s1, s0, 16
442455
; GFX7-NEXT: s_add_i32 s1, s1, s3
@@ -490,9 +503,10 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg
490503
define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg %b) {
491504
; GFX7-LABEL: s_add_v2i16_fneg_rhs:
492505
; GFX7: ; %bb.0:
506+
; GFX7-NEXT: s_and_b32 s3, 0xffff, s3
507+
; GFX7-NEXT: s_and_b32 s2, 0xffff, s2
493508
; GFX7-NEXT: s_lshl_b32 s3, s3, 16
494-
; GFX7-NEXT: s_and_b32 s2, s2, 0xffff
495-
; GFX7-NEXT: s_or_b32 s2, s3, s2
509+
; GFX7-NEXT: s_or_b32 s2, s2, s3
496510
; GFX7-NEXT: s_xor_b32 s2, s2, 0x80008000
497511
; GFX7-NEXT: s_lshr_b32 s3, s2, 16
498512
; GFX7-NEXT: s_add_i32 s1, s1, s3
@@ -546,11 +560,13 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg
546560
define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x half> inreg %b) {
547561
; GFX7-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:
548562
; GFX7: ; %bb.0:
563+
; GFX7-NEXT: s_and_b32 s1, 0xffff, s1
564+
; GFX7-NEXT: s_and_b32 s0, 0xffff, s0
549565
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
550-
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
551-
; GFX7-NEXT: s_or_b32 s0, s1, s0
552-
; GFX7-NEXT: s_lshl_b32 s1, s3, 16
553-
; GFX7-NEXT: s_and_b32 s2, s2, 0xffff
566+
; GFX7-NEXT: s_or_b32 s0, s0, s1
567+
; GFX7-NEXT: s_and_b32 s1, 0xffff, s2
568+
; GFX7-NEXT: s_and_b32 s2, 0xffff, s3
569+
; GFX7-NEXT: s_lshl_b32 s2, s2, 16
554570
; GFX7-NEXT: s_or_b32 s1, s1, s2
555571
; GFX7-NEXT: s_xor_b32 s0, s0, 0x80008000
556572
; GFX7-NEXT: s_xor_b32 s1, s1, 0x80008000

llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
; Tests for add.
77
; CHECK: name: addi32
8-
; CHECK: {{%[0-9]+}}:_(s32) = G_ADD
8+
; CHECK: {{%[0-9]+}}:_(i32) = G_ADD
99
define amdgpu_kernel void @addi32(i32 %arg1, i32 %arg2) {
1010
%res = add i32 %arg1, %arg2
1111
store i32 %res, ptr addrspace(1) undef

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,7 @@ define amdgpu_ps half @ashr_i16_sv(i16 inreg %value, i16 %amount) {
695695
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
696696
; GFX6-NEXT: s_sext_i32_i16 s0, s0
697697
; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0
698+
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
698699
; GFX6-NEXT: ; return to shader part epilog
699700
;
700701
; GFX8-LABEL: ashr_i16_sv:
@@ -722,6 +723,7 @@ define amdgpu_ps half @ashr_i16_vs(i16 %value, i16 inreg %amount) {
722723
; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
723724
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
724725
; GFX6-NEXT: v_ashrrev_i32_e32 v0, s0, v0
726+
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
725727
; GFX6-NEXT: ; return to shader part epilog
726728
;
727729
; GFX8-LABEL: ashr_i16_vs:

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -98,23 +98,53 @@ define i32 @atomic_load_flat_monotonic_i16_sext_to_i32(ptr %ptr) {
9898
}
9999

100100
define half @atomic_load_flat_monotonic_f16(ptr %ptr) {
101-
; GCN-LABEL: atomic_load_flat_monotonic_f16:
102-
; GCN: ; %bb.0:
103-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104-
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
105-
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
106-
; GCN-NEXT: s_setpc_b64 s[30:31]
101+
; GFX7-LABEL: atomic_load_flat_monotonic_f16:
102+
; GFX7: ; %bb.0:
103+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104+
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
105+
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
106+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
107+
; GFX7-NEXT: s_setpc_b64 s[30:31]
108+
;
109+
; GFX8-LABEL: atomic_load_flat_monotonic_f16:
110+
; GFX8: ; %bb.0:
111+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112+
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
113+
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
114+
; GFX8-NEXT: s_setpc_b64 s[30:31]
115+
;
116+
; GFX9-LABEL: atomic_load_flat_monotonic_f16:
117+
; GFX9: ; %bb.0:
118+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119+
; GFX9-NEXT: flat_load_ushort v0, v[0:1] glc
120+
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
121+
; GFX9-NEXT: s_setpc_b64 s[30:31]
107122
%load = load atomic half, ptr %ptr monotonic, align 2
108123
ret half %load
109124
}
110125

111126
define bfloat @atomic_load_flat_monotonic_bf16(ptr %ptr) {
112-
; GCN-LABEL: atomic_load_flat_monotonic_bf16:
113-
; GCN: ; %bb.0:
114-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115-
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
116-
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
117-
; GCN-NEXT: s_setpc_b64 s[30:31]
127+
; GFX7-LABEL: atomic_load_flat_monotonic_bf16:
128+
; GFX7: ; %bb.0:
129+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130+
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
131+
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
132+
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
133+
; GFX7-NEXT: s_setpc_b64 s[30:31]
134+
;
135+
; GFX8-LABEL: atomic_load_flat_monotonic_bf16:
136+
; GFX8: ; %bb.0:
137+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138+
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
139+
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
140+
; GFX8-NEXT: s_setpc_b64 s[30:31]
141+
;
142+
; GFX9-LABEL: atomic_load_flat_monotonic_bf16:
143+
; GFX9: ; %bb.0:
144+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145+
; GFX9-NEXT: flat_load_ushort v0, v[0:1] glc
146+
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
147+
; GFX9-NEXT: s_setpc_b64 s[30:31]
118148
%load = load atomic bfloat, ptr %ptr monotonic, align 2
119149
ret bfloat %load
120150
}
@@ -125,6 +155,7 @@ define i32 @atomic_load_flat_monotonic_f16_zext_to_i32(ptr %ptr) {
125155
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126156
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
127157
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
158+
; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
128159
; GCN-NEXT: s_setpc_b64 s[30:31]
129160
%load = load atomic half, ptr %ptr monotonic, align 2
130161
%cast = bitcast half %load to i16
@@ -138,6 +169,7 @@ define i32 @atomic_load_flat_monotonic_bf16_zext_to_i32(ptr %ptr) {
138169
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139170
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
140171
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
172+
; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
141173
; GCN-NEXT: s_setpc_b64 s[30:31]
142174
%load = load atomic bfloat, ptr %ptr monotonic, align 2
143175
%cast = bitcast bfloat %load to i16

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,13 +299,15 @@ define half @atomic_load_global_monotonic_f16(ptr addrspace(1) %ptr) {
299299
; GFX6-NEXT: s_mov_b64 s[4:5], 0
300300
; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
301301
; GFX6-NEXT: s_waitcnt vmcnt(0)
302+
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
302303
; GFX6-NEXT: s_setpc_b64 s[30:31]
303304
;
304305
; GFX7-LABEL: atomic_load_global_monotonic_f16:
305306
; GFX7: ; %bb.0:
306307
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307308
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
308309
; GFX7-NEXT: s_waitcnt vmcnt(0)
310+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
309311
; GFX7-NEXT: s_setpc_b64 s[30:31]
310312
;
311313
; GFX8-LABEL: atomic_load_global_monotonic_f16:
@@ -334,13 +336,15 @@ define bfloat @atomic_load_global_monotonic_bf16(ptr addrspace(1) %ptr) {
334336
; GFX6-NEXT: s_mov_b64 s[4:5], 0
335337
; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
336338
; GFX6-NEXT: s_waitcnt vmcnt(0)
339+
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
337340
; GFX6-NEXT: s_setpc_b64 s[30:31]
338341
;
339342
; GFX7-LABEL: atomic_load_global_monotonic_bf16:
340343
; GFX7: ; %bb.0:
341344
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342345
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
343346
; GFX7-NEXT: s_waitcnt vmcnt(0)
347+
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
344348
; GFX7-NEXT: s_setpc_b64 s[30:31]
345349
;
346350
; GFX8-LABEL: atomic_load_global_monotonic_bf16:
@@ -369,27 +373,31 @@ define i32 @atomic_load_global_monotonic_f16_zext_to_i32(ptr addrspace(1) %ptr)
369373
; GFX6-NEXT: s_mov_b64 s[4:5], 0
370374
; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
371375
; GFX6-NEXT: s_waitcnt vmcnt(0)
376+
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
372377
; GFX6-NEXT: s_setpc_b64 s[30:31]
373378
;
374379
; GFX7-LABEL: atomic_load_global_monotonic_f16_zext_to_i32:
375380
; GFX7: ; %bb.0:
376381
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377382
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
378383
; GFX7-NEXT: s_waitcnt vmcnt(0)
384+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
379385
; GFX7-NEXT: s_setpc_b64 s[30:31]
380386
;
381387
; GFX8-LABEL: atomic_load_global_monotonic_f16_zext_to_i32:
382388
; GFX8: ; %bb.0:
383389
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384390
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
385391
; GFX8-NEXT: s_waitcnt vmcnt(0)
392+
; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
386393
; GFX8-NEXT: s_setpc_b64 s[30:31]
387394
;
388395
; GFX9-LABEL: atomic_load_global_monotonic_f16_zext_to_i32:
389396
; GFX9: ; %bb.0:
390397
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391398
; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc
392399
; GFX9-NEXT: s_waitcnt vmcnt(0)
400+
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
393401
; GFX9-NEXT: s_setpc_b64 s[30:31]
394402
%load = load atomic half, ptr addrspace(1) %ptr monotonic, align 2
395403
%cast = bitcast half %load to i16
@@ -406,27 +414,31 @@ define i32 @atomic_load_global_monotonic_bf16_zext_to_i32(ptr addrspace(1) %ptr)
406414
; GFX6-NEXT: s_mov_b64 s[4:5], 0
407415
; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
408416
; GFX6-NEXT: s_waitcnt vmcnt(0)
417+
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
409418
; GFX6-NEXT: s_setpc_b64 s[30:31]
410419
;
411420
; GFX7-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32:
412421
; GFX7: ; %bb.0:
413422
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414423
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
415424
; GFX7-NEXT: s_waitcnt vmcnt(0)
425+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
416426
; GFX7-NEXT: s_setpc_b64 s[30:31]
417427
;
418428
; GFX8-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32:
419429
; GFX8: ; %bb.0:
420430
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421431
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
422432
; GFX8-NEXT: s_waitcnt vmcnt(0)
433+
; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
423434
; GFX8-NEXT: s_setpc_b64 s[30:31]
424435
;
425436
; GFX9-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32:
426437
; GFX9: ; %bb.0:
427438
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428439
; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc
429440
; GFX9-NEXT: s_waitcnt vmcnt(0)
441+
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
430442
; GFX9-NEXT: s_setpc_b64 s[30:31]
431443
%load = load atomic bfloat, ptr addrspace(1) %ptr monotonic, align 2
432444
%cast = bitcast bfloat %load to i16

0 commit comments

Comments
 (0)