Skip to content

Commit ef7dbb4

Browse files
committed
regen two lit tests
1 parent 1a25f16 commit ef7dbb4

File tree

2 files changed

+164
-154
lines changed

2 files changed

+164
-154
lines changed

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,19 +1114,23 @@ define amdgpu_kernel void @f64_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1)
11141114
; GCN-NEXT: s_waitcnt lgkmcnt(0)
11151115
; GCN-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
11161116
; GCN-NEXT: s_mov_b32 s3, 0xf000
1117-
; GCN-NEXT: s_mov_b32 s2, -1
11181117
; GCN-NEXT: s_waitcnt lgkmcnt(0)
11191118
; GCN-NEXT: v_add_f64 v[0:1], s[4:5], 1.0
1120-
; GCN-NEXT: v_and_b32_e32 v2, 0xffff0000, v1
1121-
; GCN-NEXT: v_add_i32_e32 v1, vcc, 2, v1
1122-
; GCN-NEXT: v_and_b32_e32 v3, 0xffff0000, v0
1123-
; GCN-NEXT: v_add_i32_e32 v0, vcc, 2, v0
1124-
; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1
1125-
; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
1126-
; GCN-NEXT: v_or_b32_e32 v1, v2, v1
1127-
; GCN-NEXT: v_or_b32_e32 v0, v3, v0
1128-
; GCN-NEXT: v_add_i32_e32 v1, vcc, 0x20000, v1
1129-
; GCN-NEXT: v_add_i32_e32 v0, vcc, 0x20000, v0
1119+
; GCN-NEXT: v_readfirstlane_b32 s2, v0
1120+
; GCN-NEXT: v_readfirstlane_b32 s4, v1
1121+
; GCN-NEXT: s_and_b32 s5, s4, 0xffff0000
1122+
; GCN-NEXT: s_add_i32 s4, s4, 2
1123+
; GCN-NEXT: s_and_b32 s6, s2, 0xffff0000
1124+
; GCN-NEXT: s_add_i32 s2, s2, 2
1125+
; GCN-NEXT: s_and_b32 s4, s4, 0xffff
1126+
; GCN-NEXT: s_and_b32 s2, s2, 0xffff
1127+
; GCN-NEXT: s_or_b32 s4, s5, s4
1128+
; GCN-NEXT: s_or_b32 s2, s6, s2
1129+
; GCN-NEXT: s_add_i32 s4, s4, 0x20000
1130+
; GCN-NEXT: s_add_i32 s5, s2, 0x20000
1131+
; GCN-NEXT: s_mov_b32 s2, -1
1132+
; GCN-NEXT: v_mov_b32_e32 v0, s5
1133+
; GCN-NEXT: v_mov_b32_e32 v1, s4
11301134
; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
11311135
; GCN-NEXT: s_endpgm
11321136
;
@@ -1139,14 +1143,20 @@ define amdgpu_kernel void @f64_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1)
11391143
; VI-NEXT: v_mov_b32_e32 v3, s1
11401144
; VI-NEXT: s_waitcnt lgkmcnt(0)
11411145
; VI-NEXT: v_add_f64 v[0:1], s[2:3], 1.0
1142-
; VI-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
1143-
; VI-NEXT: v_add_u32_e32 v0, vcc, 2, v0
1144-
; VI-NEXT: v_and_b32_e32 v5, 0xffff0000, v1
1145-
; VI-NEXT: v_add_u32_e32 v1, vcc, 2, v1
1146-
; VI-NEXT: v_or_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
1147-
; VI-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
1148-
; VI-NEXT: v_add_u32_e32 v1, vcc, 0x20000, v1
1149-
; VI-NEXT: v_add_u32_e32 v0, vcc, 0x20000, v0
1146+
; VI-NEXT: v_readfirstlane_b32 s0, v1
1147+
; VI-NEXT: v_readfirstlane_b32 s1, v0
1148+
; VI-NEXT: s_and_b32 s2, s1, 0xffff0000
1149+
; VI-NEXT: s_add_i32 s1, s1, 2
1150+
; VI-NEXT: s_and_b32 s3, s0, 0xffff0000
1151+
; VI-NEXT: s_add_i32 s0, s0, 2
1152+
; VI-NEXT: s_and_b32 s0, s0, 0xffff
1153+
; VI-NEXT: s_and_b32 s1, s1, 0xffff
1154+
; VI-NEXT: s_or_b32 s0, s3, s0
1155+
; VI-NEXT: s_or_b32 s1, s2, s1
1156+
; VI-NEXT: s_add_i32 s0, s0, 0x20000
1157+
; VI-NEXT: s_add_i32 s1, s1, 0x20000
1158+
; VI-NEXT: v_mov_b32_e32 v0, s1
1159+
; VI-NEXT: v_mov_b32_e32 v1, s0
11501160
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
11511161
; VI-NEXT: s_endpgm
11521162
;

llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info-multi-entry.ll

Lines changed: 135 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -45,151 +45,151 @@ define amdgpu_kernel void @preload_block_count_x(ptr addrspace(1) inreg noundef
4545
; GFX942-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
4646
; GFX942-NEXT: v_trunc_f32_e32 v1, v1
4747
; GFX942-NEXT: v_fmamk_f32 v0, v1, 0xcf800000, v0
48-
; GFX942-NEXT: v_cvt_u32_f32_e32 v4, v1
49-
; GFX942-NEXT: v_cvt_u32_f32_e32 v5, v0
50-
; GFX942-NEXT: v_mul_lo_u32 v0, s1, v4
51-
; GFX942-NEXT: v_mul_hi_u32 v2, s1, v5
52-
; GFX942-NEXT: v_mul_lo_u32 v1, s3, v5
53-
; GFX942-NEXT: v_add_u32_e32 v0, v2, v0
54-
; GFX942-NEXT: v_mul_lo_u32 v6, s1, v5
55-
; GFX942-NEXT: v_add_u32_e32 v7, v0, v1
56-
; GFX942-NEXT: v_mul_hi_u32 v3, v5, v7
57-
; GFX942-NEXT: v_mul_lo_u32 v2, v5, v7
58-
; GFX942-NEXT: v_mul_hi_u32 v0, v5, v6
59-
; GFX942-NEXT: v_mov_b32_e32 v1, 0
60-
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
61-
; GFX942-NEXT: v_mul_hi_u32 v0, v4, v6
62-
; GFX942-NEXT: v_mul_lo_u32 v6, v4, v6
63-
; GFX942-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
64-
; GFX942-NEXT: v_mul_hi_u32 v8, v4, v7
65-
; GFX942-NEXT: s_nop 0
66-
; GFX942-NEXT: v_addc_co_u32_e32 v0, vcc, v3, v0, vcc
67-
; GFX942-NEXT: v_mul_lo_u32 v2, v4, v7
68-
; GFX942-NEXT: s_nop 0
69-
; GFX942-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v8, vcc
70-
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
71-
; GFX942-NEXT: v_add_co_u32_e32 v5, vcc, v5, v2
72-
; GFX942-NEXT: v_mul_hi_u32 v2, s1, v5
73-
; GFX942-NEXT: s_nop 0
74-
; GFX942-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v3, vcc
75-
; GFX942-NEXT: v_mul_lo_u32 v0, s1, v4
76-
; GFX942-NEXT: v_add_u32_e32 v0, v2, v0
77-
; GFX942-NEXT: v_mul_lo_u32 v2, s3, v5
78-
; GFX942-NEXT: v_add_u32_e32 v6, v0, v2
79-
; GFX942-NEXT: v_mul_lo_u32 v0, s1, v5
80-
; GFX942-NEXT: v_mul_hi_u32 v8, v4, v0
81-
; GFX942-NEXT: v_mul_lo_u32 v9, v4, v0
82-
; GFX942-NEXT: v_mul_hi_u32 v3, v5, v6
83-
; GFX942-NEXT: v_mul_lo_u32 v2, v5, v6
84-
; GFX942-NEXT: v_mul_hi_u32 v0, v5, v0
85-
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
86-
; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, v2, v9
87-
; GFX942-NEXT: v_mul_hi_u32 v7, v4, v6
88-
; GFX942-NEXT: s_nop 0
89-
; GFX942-NEXT: v_addc_co_u32_e32 v0, vcc, v3, v8, vcc
90-
; GFX942-NEXT: v_mul_lo_u32 v2, v4, v6
91-
; GFX942-NEXT: s_nop 0
92-
; GFX942-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v7, vcc
93-
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
94-
; GFX942-NEXT: v_add_co_u32_e32 v5, vcc, v5, v2
95-
; GFX942-NEXT: v_mul_hi_u32 v0, s6, v5
96-
; GFX942-NEXT: s_nop 0
97-
; GFX942-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v3, vcc
98-
; GFX942-NEXT: v_mul_hi_u32 v3, s6, v4
99-
; GFX942-NEXT: v_mul_lo_u32 v2, s6, v4
100-
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
101-
; GFX942-NEXT: v_mul_hi_u32 v0, s7, v5
102-
; GFX942-NEXT: v_mul_lo_u32 v5, s7, v5
103-
; GFX942-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
104-
; GFX942-NEXT: v_mul_hi_u32 v6, s7, v4
105-
; GFX942-NEXT: s_nop 0
106-
; GFX942-NEXT: v_addc_co_u32_e32 v0, vcc, v3, v0, vcc
107-
; GFX942-NEXT: v_mul_lo_u32 v2, s7, v4
108-
; GFX942-NEXT: s_nop 0
109-
; GFX942-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v6, vcc
110-
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
111-
; GFX942-NEXT: v_mul_lo_u32 v2, s12, v1
112-
; GFX942-NEXT: v_mul_hi_u32 v3, s12, v0
113-
; GFX942-NEXT: v_add_u32_e32 v2, v3, v2
114-
; GFX942-NEXT: v_mul_lo_u32 v3, s13, v0
115-
; GFX942-NEXT: v_add_u32_e32 v6, v2, v3
116-
; GFX942-NEXT: v_mul_lo_u32 v3, s12, v0
117-
; GFX942-NEXT: v_sub_u32_e32 v2, s7, v6
118-
; GFX942-NEXT: v_mov_b32_e32 v4, s13
119-
; GFX942-NEXT: v_sub_co_u32_e32 v7, vcc, s6, v3
120-
; GFX942-NEXT: s_nop 1
121-
; GFX942-NEXT: v_subb_co_u32_e64 v2, s[8:9], v2, v4, vcc
122-
; GFX942-NEXT: v_subrev_co_u32_e64 v3, s[8:9], s12, v7
123-
; GFX942-NEXT: s_nop 1
124-
; GFX942-NEXT: v_subbrev_co_u32_e64 v2, s[8:9], 0, v2, s[8:9]
125-
; GFX942-NEXT: v_cmp_le_u32_e64 s[8:9], s13, v2
126-
; GFX942-NEXT: s_nop 1
127-
; GFX942-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[8:9]
128-
; GFX942-NEXT: v_cmp_le_u32_e64 s[8:9], s12, v3
129-
; GFX942-NEXT: s_nop 1
130-
; GFX942-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[8:9]
131-
; GFX942-NEXT: v_cmp_eq_u32_e64 s[8:9], s13, v2
132-
; GFX942-NEXT: s_nop 1
133-
; GFX942-NEXT: v_cndmask_b32_e64 v8, v4, v3, s[8:9]
134-
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, 1
135-
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, 2
136-
; GFX942-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v8
137-
; GFX942-NEXT: s_nop 1
138-
; GFX942-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[8:9]
139-
; GFX942-NEXT: v_mov_b32_e32 v4, s7
140-
; GFX942-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v6, vcc
141-
; GFX942-NEXT: v_cmp_le_u32_e32 vcc, s13, v4
142-
; GFX942-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[8:9]
143-
; GFX942-NEXT: s_nop 0
144-
; GFX942-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
145-
; GFX942-NEXT: v_cmp_le_u32_e32 vcc, s12, v7
146-
; GFX942-NEXT: s_nop 1
147-
; GFX942-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
148-
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, s13, v4
149-
; GFX942-NEXT: s_nop 1
150-
; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc
151-
; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
152-
; GFX942-NEXT: s_nop 1
153-
; GFX942-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
154-
; GFX942-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
48+
; GFX942-NEXT: v_cvt_u32_f32_e32 v1, v1
49+
; GFX942-NEXT: v_cvt_u32_f32_e32 v0, v0
50+
; GFX942-NEXT: v_readfirstlane_b32 s5, v1
51+
; GFX942-NEXT: v_readfirstlane_b32 s8, v0
52+
; GFX942-NEXT: s_mul_i32 s9, s1, s5
53+
; GFX942-NEXT: s_mul_hi_u32 s15, s1, s8
54+
; GFX942-NEXT: s_mul_i32 s14, s3, s8
55+
; GFX942-NEXT: s_add_i32 s9, s15, s9
56+
; GFX942-NEXT: s_add_i32 s9, s9, s14
57+
; GFX942-NEXT: s_mul_i32 s16, s1, s8
58+
; GFX942-NEXT: s_mul_hi_u32 s14, s8, s9
59+
; GFX942-NEXT: s_mul_i32 s15, s8, s9
60+
; GFX942-NEXT: s_mul_hi_u32 s8, s8, s16
61+
; GFX942-NEXT: s_add_u32 s8, s8, s15
62+
; GFX942-NEXT: s_addc_u32 s14, 0, s14
63+
; GFX942-NEXT: s_mul_hi_u32 s17, s5, s16
64+
; GFX942-NEXT: s_mul_i32 s16, s5, s16
65+
; GFX942-NEXT: s_add_u32 s8, s8, s16
66+
; GFX942-NEXT: s_mul_hi_u32 s15, s5, s9
67+
; GFX942-NEXT: s_addc_u32 s8, s14, s17
68+
; GFX942-NEXT: s_addc_u32 s14, s15, 0
69+
; GFX942-NEXT: s_mul_i32 s9, s5, s9
70+
; GFX942-NEXT: s_add_u32 s8, s8, s9
71+
; GFX942-NEXT: s_addc_u32 s9, 0, s14
72+
; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, s8, v0
73+
; GFX942-NEXT: s_cmp_lg_u64 vcc, 0
74+
; GFX942-NEXT: s_addc_u32 s5, s5, s9
75+
; GFX942-NEXT: v_readfirstlane_b32 s9, v0
76+
; GFX942-NEXT: s_mul_i32 s8, s1, s5
77+
; GFX942-NEXT: s_mul_hi_u32 s14, s1, s9
78+
; GFX942-NEXT: s_add_i32 s8, s14, s8
79+
; GFX942-NEXT: s_mul_i32 s3, s3, s9
80+
; GFX942-NEXT: s_add_i32 s8, s8, s3
81+
; GFX942-NEXT: s_mul_i32 s1, s1, s9
82+
; GFX942-NEXT: s_mul_hi_u32 s14, s5, s1
83+
; GFX942-NEXT: s_mul_i32 s15, s5, s1
84+
; GFX942-NEXT: s_mul_i32 s17, s9, s8
85+
; GFX942-NEXT: s_mul_hi_u32 s1, s9, s1
86+
; GFX942-NEXT: s_mul_hi_u32 s16, s9, s8
87+
; GFX942-NEXT: s_add_u32 s1, s1, s17
88+
; GFX942-NEXT: s_addc_u32 s9, 0, s16
89+
; GFX942-NEXT: s_add_u32 s1, s1, s15
90+
; GFX942-NEXT: s_mul_hi_u32 s3, s5, s8
91+
; GFX942-NEXT: s_addc_u32 s1, s9, s14
92+
; GFX942-NEXT: s_addc_u32 s3, s3, 0
93+
; GFX942-NEXT: s_mul_i32 s8, s5, s8
94+
; GFX942-NEXT: s_add_u32 s1, s1, s8
95+
; GFX942-NEXT: s_addc_u32 s3, 0, s3
96+
; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, s1, v0
97+
; GFX942-NEXT: s_cmp_lg_u64 vcc, 0
98+
; GFX942-NEXT: s_addc_u32 s1, s5, s3
99+
; GFX942-NEXT: v_readfirstlane_b32 s8, v0
100+
; GFX942-NEXT: s_mul_i32 s5, s6, s1
101+
; GFX942-NEXT: s_mul_hi_u32 s9, s6, s8
102+
; GFX942-NEXT: s_mul_hi_u32 s3, s6, s1
103+
; GFX942-NEXT: s_add_u32 s5, s9, s5
104+
; GFX942-NEXT: s_addc_u32 s3, 0, s3
105+
; GFX942-NEXT: s_mul_hi_u32 s14, s7, s8
106+
; GFX942-NEXT: s_mul_i32 s8, s7, s8
107+
; GFX942-NEXT: s_add_u32 s5, s5, s8
108+
; GFX942-NEXT: s_mul_hi_u32 s9, s7, s1
109+
; GFX942-NEXT: s_addc_u32 s3, s3, s14
110+
; GFX942-NEXT: s_addc_u32 s5, s9, 0
111+
; GFX942-NEXT: s_mul_i32 s1, s7, s1
112+
; GFX942-NEXT: s_add_u32 s1, s3, s1
113+
; GFX942-NEXT: s_addc_u32 s3, 0, s5
114+
; GFX942-NEXT: s_mul_i32 s5, s12, s3
115+
; GFX942-NEXT: s_mul_hi_u32 s8, s12, s1
116+
; GFX942-NEXT: s_add_i32 s5, s8, s5
117+
; GFX942-NEXT: s_mul_i32 s8, s13, s1
118+
; GFX942-NEXT: s_mul_i32 s9, s12, s1
119+
; GFX942-NEXT: s_add_i32 s5, s5, s8
120+
; GFX942-NEXT: v_mov_b32_e32 v0, s9
121+
; GFX942-NEXT: s_sub_i32 s8, s7, s5
122+
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, s6, v0
123+
; GFX942-NEXT: s_cmp_lg_u64 vcc, 0
124+
; GFX942-NEXT: s_subb_u32 s14, s8, s13
125+
; GFX942-NEXT: v_subrev_co_u32_e64 v1, s[8:9], s12, v0
126+
; GFX942-NEXT: s_cmp_lg_u64 s[8:9], 0
127+
; GFX942-NEXT: s_subb_u32 s8, s14, 0
128+
; GFX942-NEXT: s_cmp_ge_u32 s8, s13
129+
; GFX942-NEXT: v_readfirstlane_b32 s14, v1
130+
; GFX942-NEXT: s_cselect_b32 s9, -1, 0
131+
; GFX942-NEXT: s_cmp_ge_u32 s14, s12
132+
; GFX942-NEXT: s_cselect_b32 s14, -1, 0
133+
; GFX942-NEXT: s_cmp_eq_u32 s8, s13
134+
; GFX942-NEXT: s_cselect_b32 s8, s14, s9
135+
; GFX942-NEXT: s_add_u32 s9, s1, 1
136+
; GFX942-NEXT: s_addc_u32 s14, s3, 0
137+
; GFX942-NEXT: s_add_u32 s15, s1, 2
138+
; GFX942-NEXT: s_addc_u32 s16, s3, 0
139+
; GFX942-NEXT: s_cmp_lg_u32 s8, 0
140+
; GFX942-NEXT: s_cselect_b32 s8, s15, s9
141+
; GFX942-NEXT: s_cselect_b32 s9, s16, s14
142+
; GFX942-NEXT: s_cmp_lg_u64 vcc, 0
143+
; GFX942-NEXT: s_subb_u32 s5, s7, s5
144+
; GFX942-NEXT: s_cmp_ge_u32 s5, s13
145+
; GFX942-NEXT: v_readfirstlane_b32 s15, v0
146+
; GFX942-NEXT: s_cselect_b32 s14, -1, 0
147+
; GFX942-NEXT: s_cmp_ge_u32 s15, s12
148+
; GFX942-NEXT: s_cselect_b32 s15, -1, 0
149+
; GFX942-NEXT: s_cmp_eq_u32 s5, s13
150+
; GFX942-NEXT: s_cselect_b32 s5, s15, s14
151+
; GFX942-NEXT: s_cmp_lg_u32 s5, 0
152+
; GFX942-NEXT: s_cselect_b32 s9, s9, s3
153+
; GFX942-NEXT: s_cselect_b32 s8, s8, s1
155154
; GFX942-NEXT: s_cbranch_execnz .LBB0_3
156155
; GFX942-NEXT: .LBB0_2:
157156
; GFX942-NEXT: v_cvt_f32_u32_e32 v0, s12
158157
; GFX942-NEXT: s_sub_i32 s1, 0, s12
158+
; GFX942-NEXT: s_mov_b32 s9, 0
159159
; GFX942-NEXT: v_rcp_iflag_f32_e32 v0, v0
160160
; GFX942-NEXT: s_nop 0
161161
; GFX942-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
162162
; GFX942-NEXT: v_cvt_u32_f32_e32 v0, v0
163-
; GFX942-NEXT: v_mul_lo_u32 v1, s1, v0
164-
; GFX942-NEXT: v_mul_hi_u32 v1, v0, v1
165-
; GFX942-NEXT: v_add_u32_e32 v0, v0, v1
166-
; GFX942-NEXT: v_mul_hi_u32 v0, s6, v0
167-
; GFX942-NEXT: v_mul_lo_u32 v2, v0, s12
168-
; GFX942-NEXT: v_sub_u32_e32 v2, s6, v2
169-
; GFX942-NEXT: v_add_u32_e32 v1, 1, v0
170-
; GFX942-NEXT: v_subrev_u32_e32 v3, s12, v2
171-
; GFX942-NEXT: v_cmp_le_u32_e32 vcc, s12, v2
172-
; GFX942-NEXT: s_nop 1
173-
; GFX942-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
174-
; GFX942-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
175-
; GFX942-NEXT: v_add_u32_e32 v1, 1, v0
176-
; GFX942-NEXT: v_cmp_le_u32_e32 vcc, s12, v2
177-
; GFX942-NEXT: s_nop 1
178-
; GFX942-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
179-
; GFX942-NEXT: v_mov_b32_e32 v1, 0
163+
; GFX942-NEXT: s_nop 0
164+
; GFX942-NEXT: v_readfirstlane_b32 s3, v0
165+
; GFX942-NEXT: s_mul_i32 s1, s1, s3
166+
; GFX942-NEXT: s_mul_hi_u32 s1, s3, s1
167+
; GFX942-NEXT: s_add_i32 s3, s3, s1
168+
; GFX942-NEXT: s_mul_hi_u32 s1, s6, s3
169+
; GFX942-NEXT: s_mul_i32 s5, s1, s12
170+
; GFX942-NEXT: s_sub_i32 s5, s6, s5
171+
; GFX942-NEXT: s_add_i32 s3, s1, 1
172+
; GFX942-NEXT: s_sub_i32 s8, s5, s12
173+
; GFX942-NEXT: s_cmp_ge_u32 s5, s12
174+
; GFX942-NEXT: s_cselect_b32 s1, s3, s1
175+
; GFX942-NEXT: s_cselect_b32 s5, s8, s5
176+
; GFX942-NEXT: s_add_i32 s3, s1, 1
177+
; GFX942-NEXT: s_cmp_ge_u32 s5, s12
178+
; GFX942-NEXT: s_cselect_b32 s8, s3, s1
180179
; GFX942-NEXT: .LBB0_3:
181-
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, 15
182180
; GFX942-NEXT: s_ashr_i32 s1, s0, 31
183-
; GFX942-NEXT: v_and_b32_e32 v0, -16, v0
184-
; GFX942-NEXT: v_mul_lo_u32 v2, v0, s1
185-
; GFX942-NEXT: v_mul_hi_u32 v3, v0, s0
186-
; GFX942-NEXT: v_add_u32_e32 v2, v3, v2
187-
; GFX942-NEXT: v_mul_lo_u32 v1, v1, s0
188-
; GFX942-NEXT: v_add_u32_e32 v1, v2, v1
189-
; GFX942-NEXT: v_mul_lo_u32 v2, v0, s0
190-
; GFX942-NEXT: v_cvt_f64_i32_e32 v[0:1], v1
181+
; GFX942-NEXT: s_add_u32 s3, s8, 15
182+
; GFX942-NEXT: s_addc_u32 s5, s9, 0
183+
; GFX942-NEXT: s_and_b32 s3, s3, -16
184+
; GFX942-NEXT: s_mul_i32 s1, s3, s1
185+
; GFX942-NEXT: s_mul_hi_u32 s8, s3, s0
186+
; GFX942-NEXT: s_add_i32 s1, s8, s1
187+
; GFX942-NEXT: s_mul_i32 s5, s5, s0
188+
; GFX942-NEXT: s_add_i32 s1, s1, s5
189+
; GFX942-NEXT: s_mul_i32 s3, s3, s0
190+
; GFX942-NEXT: v_cvt_f64_i32_e32 v[0:1], s1
191191
; GFX942-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
192-
; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], v2
192+
; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s3
193193
; GFX942-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
194194
; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s7
195195
; GFX942-NEXT: v_ldexp_f64 v[2:3], v[2:3], 32
@@ -215,7 +215,7 @@ define amdgpu_kernel void @preload_block_count_x(ptr addrspace(1) inreg noundef
215215
; GFX942-NEXT: .LBB0_4:
216216
; GFX942-NEXT: .Ltmp4:
217217
; GFX942-NEXT: ;DEBUG_VALUE: test:var <- [DW_OP_LLVM_poisoned] $sgpr2_sgpr3
218-
; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1
218+
; GFX942-NEXT: ; implicit-def: $sgpr8_sgpr9
219219
; GFX942-NEXT: s_branch .LBB0_2
220220
; GFX942-NEXT: .Ltmp5:
221221
entry:

0 commit comments

Comments
 (0)