Skip to content

Commit 463769e

Browse files
committed
Set all bits for wave mask
Signed-off-by: John Lu <[email protected]>
1 parent 2474bc9 commit 463769e

File tree

11 files changed

+175
-175
lines changed

11 files changed

+175
-175
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5972,7 +5972,7 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
59725972

59735973
unsigned SelOpc =
59745974
Subtarget->isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
5975-
BuildMI(*BB, MI, DL, TII->get(SelOpc), Dest1.getReg()).addImm(1).addImm(0);
5975+
BuildMI(*BB, MI, DL, TII->get(SelOpc), Dest1.getReg()).addImm(-1).addImm(0);
59765976

59775977
MI.eraseFromParent();
59785978
return BB;

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll

Lines changed: 54 additions & 54 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/carryout-selection.ll

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2216,11 +2216,11 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
22162216
; VI-NEXT: s_sub_i32 s10, s3, s14
22172217
; VI-NEXT: v_readfirstlane_b32 s8, v0
22182218
; VI-NEXT: s_sub_u32 s15, s2, s8
2219-
; VI-NEXT: s_cselect_b64 s[8:9], 1, 0
2219+
; VI-NEXT: s_cselect_b64 s[8:9], -1, 0
22202220
; VI-NEXT: s_cmp_lg_u64 s[8:9], 0
22212221
; VI-NEXT: s_subb_u32 s16, s10, s5
22222222
; VI-NEXT: s_sub_u32 s17, s15, s4
2223-
; VI-NEXT: s_cselect_b64 s[10:11], 1, 0
2223+
; VI-NEXT: s_cselect_b64 s[10:11], -1, 0
22242224
; VI-NEXT: s_cmp_lg_u64 s[10:11], 0
22252225
; VI-NEXT: s_subb_u32 s10, s16, 0
22262226
; VI-NEXT: s_cmp_ge_u32 s10, s5
@@ -2330,7 +2330,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
23302330
; GFX9-NEXT: s_add_u32 s9, s13, s9
23312331
; GFX9-NEXT: s_addc_u32 s13, 0, s14
23322332
; GFX9-NEXT: s_add_u32 s14, s8, s9
2333-
; GFX9-NEXT: s_cselect_b64 s[8:9], 1, 0
2333+
; GFX9-NEXT: s_cselect_b64 s[8:9], -1, 0
23342334
; GFX9-NEXT: s_cmp_lg_u64 s[8:9], 0
23352335
; GFX9-NEXT: s_addc_u32 s12, s12, s13
23362336
; GFX9-NEXT: s_mul_i32 s8, s10, s12
@@ -2354,7 +2354,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
23542354
; GFX9-NEXT: s_add_u32 s8, s10, s8
23552355
; GFX9-NEXT: s_addc_u32 s10, 0, s9
23562356
; GFX9-NEXT: s_add_u32 s11, s14, s8
2357-
; GFX9-NEXT: s_cselect_b64 s[8:9], 1, 0
2357+
; GFX9-NEXT: s_cselect_b64 s[8:9], -1, 0
23582358
; GFX9-NEXT: s_cmp_lg_u64 s[8:9], 0
23592359
; GFX9-NEXT: s_addc_u32 s8, s12, s10
23602360
; GFX9-NEXT: s_mul_i32 s10, s2, s8
@@ -2379,11 +2379,11 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
23792379
; GFX9-NEXT: s_sub_i32 s10, s3, s14
23802380
; GFX9-NEXT: s_mul_i32 s8, s6, s12
23812381
; GFX9-NEXT: s_sub_u32 s15, s2, s8
2382-
; GFX9-NEXT: s_cselect_b64 s[8:9], 1, 0
2382+
; GFX9-NEXT: s_cselect_b64 s[8:9], -1, 0
23832383
; GFX9-NEXT: s_cmp_lg_u64 s[8:9], 0
23842384
; GFX9-NEXT: s_subb_u32 s16, s10, s7
23852385
; GFX9-NEXT: s_sub_u32 s17, s15, s6
2386-
; GFX9-NEXT: s_cselect_b64 s[10:11], 1, 0
2386+
; GFX9-NEXT: s_cselect_b64 s[10:11], -1, 0
23872387
; GFX9-NEXT: s_cmp_lg_u64 s[10:11], 0
23882388
; GFX9-NEXT: s_subb_u32 s10, s16, 0
23892389
; GFX9-NEXT: s_cmp_ge_u32 s10, s7
@@ -2489,7 +2489,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
24892489
; GFX1010-NEXT: s_add_u32 s11, s12, s11
24902490
; GFX1010-NEXT: s_addc_u32 s12, 0, s13
24912491
; GFX1010-NEXT: s_add_u32 s8, s8, s11
2492-
; GFX1010-NEXT: s_cselect_b32 s11, 1, 0
2492+
; GFX1010-NEXT: s_cselect_b32 s11, -1, 0
24932493
; GFX1010-NEXT: s_mul_hi_u32 s13, s9, s8
24942494
; GFX1010-NEXT: s_cmp_lg_u32 s11, 0
24952495
; GFX1010-NEXT: s_mul_i32 s11, s9, s8
@@ -2513,7 +2513,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
25132513
; GFX1010-NEXT: s_add_u32 s9, s10, s9
25142514
; GFX1010-NEXT: s_addc_u32 s10, 0, s11
25152515
; GFX1010-NEXT: s_add_u32 s8, s8, s9
2516-
; GFX1010-NEXT: s_cselect_b32 s9, 1, 0
2516+
; GFX1010-NEXT: s_cselect_b32 s9, -1, 0
25172517
; GFX1010-NEXT: s_mul_hi_u32 s11, s2, s8
25182518
; GFX1010-NEXT: s_cmp_lg_u32 s9, 0
25192519
; GFX1010-NEXT: s_mul_hi_u32 s9, s3, s8
@@ -2538,11 +2538,11 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
25382538
; GFX1010-NEXT: s_add_i32 s9, s9, s11
25392539
; GFX1010-NEXT: s_sub_i32 s11, s3, s9
25402540
; GFX1010-NEXT: s_sub_u32 s10, s2, s10
2541-
; GFX1010-NEXT: s_cselect_b32 s12, 1, 0
2541+
; GFX1010-NEXT: s_cselect_b32 s12, -1, 0
25422542
; GFX1010-NEXT: s_cmp_lg_u32 s12, 0
25432543
; GFX1010-NEXT: s_subb_u32 s11, s11, s7
25442544
; GFX1010-NEXT: s_sub_u32 s13, s10, s6
2545-
; GFX1010-NEXT: s_cselect_b32 s14, 1, 0
2545+
; GFX1010-NEXT: s_cselect_b32 s14, -1, 0
25462546
; GFX1010-NEXT: s_cmp_lg_u32 s14, 0
25472547
; GFX1010-NEXT: s_subb_u32 s11, s11, 0
25482548
; GFX1010-NEXT: s_cmp_ge_u32 s11, s7
@@ -2649,7 +2649,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
26492649
; GFX1030W32-NEXT: s_add_u32 s11, s12, s11
26502650
; GFX1030W32-NEXT: s_addc_u32 s12, 0, s13
26512651
; GFX1030W32-NEXT: s_add_u32 s8, s8, s11
2652-
; GFX1030W32-NEXT: s_cselect_b32 s11, 1, 0
2652+
; GFX1030W32-NEXT: s_cselect_b32 s11, -1, 0
26532653
; GFX1030W32-NEXT: s_mul_hi_u32 s13, s9, s8
26542654
; GFX1030W32-NEXT: s_cmp_lg_u32 s11, 0
26552655
; GFX1030W32-NEXT: s_mul_i32 s11, s9, s8
@@ -2673,7 +2673,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
26732673
; GFX1030W32-NEXT: s_add_u32 s9, s10, s9
26742674
; GFX1030W32-NEXT: s_addc_u32 s10, 0, s11
26752675
; GFX1030W32-NEXT: s_add_u32 s8, s8, s9
2676-
; GFX1030W32-NEXT: s_cselect_b32 s9, 1, 0
2676+
; GFX1030W32-NEXT: s_cselect_b32 s9, -1, 0
26772677
; GFX1030W32-NEXT: s_mul_hi_u32 s11, s2, s8
26782678
; GFX1030W32-NEXT: s_cmp_lg_u32 s9, 0
26792679
; GFX1030W32-NEXT: s_mul_hi_u32 s9, s3, s8
@@ -2698,11 +2698,11 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
26982698
; GFX1030W32-NEXT: s_add_i32 s9, s9, s11
26992699
; GFX1030W32-NEXT: s_sub_i32 s11, s3, s9
27002700
; GFX1030W32-NEXT: s_sub_u32 s10, s2, s10
2701-
; GFX1030W32-NEXT: s_cselect_b32 s12, 1, 0
2701+
; GFX1030W32-NEXT: s_cselect_b32 s12, -1, 0
27022702
; GFX1030W32-NEXT: s_cmp_lg_u32 s12, 0
27032703
; GFX1030W32-NEXT: s_subb_u32 s11, s11, s5
27042704
; GFX1030W32-NEXT: s_sub_u32 s13, s10, s4
2705-
; GFX1030W32-NEXT: s_cselect_b32 s14, 1, 0
2705+
; GFX1030W32-NEXT: s_cselect_b32 s14, -1, 0
27062706
; GFX1030W32-NEXT: s_cmp_lg_u32 s14, 0
27072707
; GFX1030W32-NEXT: s_subb_u32 s11, s11, 0
27082708
; GFX1030W32-NEXT: s_cmp_ge_u32 s11, s5
@@ -2809,7 +2809,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
28092809
; GFX1030W64-NEXT: s_add_u32 s7, s11, s7
28102810
; GFX1030W64-NEXT: s_addc_u32 s11, 0, s12
28112811
; GFX1030W64-NEXT: s_add_u32 s12, s6, s7
2812-
; GFX1030W64-NEXT: s_cselect_b64 s[6:7], 1, 0
2812+
; GFX1030W64-NEXT: s_cselect_b64 s[6:7], -1, 0
28132813
; GFX1030W64-NEXT: s_mul_hi_u32 s13, s9, s12
28142814
; GFX1030W64-NEXT: s_cmp_lg_u64 s[6:7], 0
28152815
; GFX1030W64-NEXT: s_mul_i32 s6, s9, s12
@@ -2833,7 +2833,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
28332833
; GFX1030W64-NEXT: s_add_u32 s6, s6, s9
28342834
; GFX1030W64-NEXT: s_addc_u32 s9, 0, s7
28352835
; GFX1030W64-NEXT: s_add_u32 s10, s12, s6
2836-
; GFX1030W64-NEXT: s_cselect_b64 s[6:7], 1, 0
2836+
; GFX1030W64-NEXT: s_cselect_b64 s[6:7], -1, 0
28372837
; GFX1030W64-NEXT: s_mul_hi_u32 s11, s2, s10
28382838
; GFX1030W64-NEXT: s_cmp_lg_u64 s[6:7], 0
28392839
; GFX1030W64-NEXT: s_mul_hi_u32 s6, s3, s10
@@ -2858,11 +2858,11 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
28582858
; GFX1030W64-NEXT: s_mul_i32 s6, s4, s10
28592859
; GFX1030W64-NEXT: s_sub_i32 s8, s3, s12
28602860
; GFX1030W64-NEXT: s_sub_u32 s13, s2, s6
2861-
; GFX1030W64-NEXT: s_cselect_b64 s[6:7], 1, 0
2861+
; GFX1030W64-NEXT: s_cselect_b64 s[6:7], -1, 0
28622862
; GFX1030W64-NEXT: s_cmp_lg_u64 s[6:7], 0
28632863
; GFX1030W64-NEXT: s_subb_u32 s14, s8, s5
28642864
; GFX1030W64-NEXT: s_sub_u32 s15, s13, s4
2865-
; GFX1030W64-NEXT: s_cselect_b64 s[8:9], 1, 0
2865+
; GFX1030W64-NEXT: s_cselect_b64 s[8:9], -1, 0
28662866
; GFX1030W64-NEXT: s_cmp_lg_u64 s[8:9], 0
28672867
; GFX1030W64-NEXT: s_subb_u32 s8, s14, 0
28682868
; GFX1030W64-NEXT: s_cmp_ge_u32 s8, s5
@@ -2974,7 +2974,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
29742974
; GFX11-NEXT: s_add_u32 s11, s12, s11
29752975
; GFX11-NEXT: s_addc_u32 s12, 0, s13
29762976
; GFX11-NEXT: s_add_u32 s8, s8, s11
2977-
; GFX11-NEXT: s_cselect_b32 s11, 1, 0
2977+
; GFX11-NEXT: s_cselect_b32 s11, -1, 0
29782978
; GFX11-NEXT: s_mul_hi_u32 s13, s9, s8
29792979
; GFX11-NEXT: s_cmp_lg_u32 s11, 0
29802980
; GFX11-NEXT: s_mul_i32 s11, s9, s8
@@ -2998,7 +2998,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
29982998
; GFX11-NEXT: s_add_u32 s9, s10, s9
29992999
; GFX11-NEXT: s_addc_u32 s10, 0, s11
30003000
; GFX11-NEXT: s_add_u32 s8, s8, s9
3001-
; GFX11-NEXT: s_cselect_b32 s9, 1, 0
3001+
; GFX11-NEXT: s_cselect_b32 s9, -1, 0
30023002
; GFX11-NEXT: s_mul_hi_u32 s11, s2, s8
30033003
; GFX11-NEXT: s_cmp_lg_u32 s9, 0
30043004
; GFX11-NEXT: s_mul_hi_u32 s9, s3, s8
@@ -3024,11 +3024,11 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
30243024
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
30253025
; GFX11-NEXT: s_sub_i32 s11, s3, s9
30263026
; GFX11-NEXT: s_sub_u32 s10, s2, s10
3027-
; GFX11-NEXT: s_cselect_b32 s12, 1, 0
3027+
; GFX11-NEXT: s_cselect_b32 s12, -1, 0
30283028
; GFX11-NEXT: s_cmp_lg_u32 s12, 0
30293029
; GFX11-NEXT: s_subb_u32 s11, s11, s5
30303030
; GFX11-NEXT: s_sub_u32 s13, s10, s4
3031-
; GFX11-NEXT: s_cselect_b32 s14, 1, 0
3031+
; GFX11-NEXT: s_cselect_b32 s14, -1, 0
30323032
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
30333033
; GFX11-NEXT: s_cmp_lg_u32 s14, 0
30343034
; GFX11-NEXT: s_subb_u32 s11, s11, 0
@@ -3141,7 +3141,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
31413141
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
31423142
; GFX1250-NEXT: s_add_nc_u64 s[12:13], s[6:7], s[12:13]
31433143
; GFX1250-NEXT: s_add_co_u32 s8, s8, s12
3144-
; GFX1250-NEXT: s_cselect_b32 s6, 1, 0
3144+
; GFX1250-NEXT: s_cselect_b32 s6, -1, 0
31453145
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
31463146
; GFX1250-NEXT: s_cmp_lg_u32 s6, 0
31473147
; GFX1250-NEXT: s_add_co_ci_u32 s9, s9, s13
@@ -3161,7 +3161,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
31613161
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
31623162
; GFX1250-NEXT: s_add_nc_u64 s[10:11], s[6:7], s[10:11]
31633163
; GFX1250-NEXT: s_add_co_u32 s8, s8, s10
3164-
; GFX1250-NEXT: s_cselect_b32 s10, 1, 0
3164+
; GFX1250-NEXT: s_cselect_b32 s10, -1, 0
31653165
; GFX1250-NEXT: s_mul_hi_u32 s6, s2, s8
31663166
; GFX1250-NEXT: s_cmp_lg_u32 s10, 0
31673167
; GFX1250-NEXT: s_mul_hi_u32 s12, s3, s8
@@ -3183,12 +3183,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
31833183
; GFX1250-NEXT: s_mul_u64 s[8:9], s[4:5], s[10:11]
31843184
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
31853185
; GFX1250-NEXT: s_sub_co_u32 s6, s2, s8
3186-
; GFX1250-NEXT: s_cselect_b32 s8, 1, 0
3186+
; GFX1250-NEXT: s_cselect_b32 s8, -1, 0
31873187
; GFX1250-NEXT: s_sub_co_i32 s12, s3, s9
31883188
; GFX1250-NEXT: s_cmp_lg_u32 s8, 0
31893189
; GFX1250-NEXT: s_sub_co_ci_u32 s12, s12, s5
31903190
; GFX1250-NEXT: s_sub_co_u32 s13, s6, s4
3191-
; GFX1250-NEXT: s_cselect_b32 s14, 1, 0
3191+
; GFX1250-NEXT: s_cselect_b32 s14, -1, 0
31923192
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
31933193
; GFX1250-NEXT: s_cmp_lg_u32 s14, 0
31943194
; GFX1250-NEXT: s_sub_co_ci_u32 s12, s12, 0

llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define i32 @s_add_co_select_user() {
1212
; GFX7-NEXT: s_load_dword s6, s[4:5], 0x0
1313
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1414
; GFX7-NEXT: s_add_u32 s7, s6, s6
15-
; GFX7-NEXT: s_cselect_b64 s[4:5], 1, 0
15+
; GFX7-NEXT: s_cselect_b64 s[4:5], -1, 0
1616
; GFX7-NEXT: s_or_b32 s4, s4, s5
1717
; GFX7-NEXT: s_cmp_lg_u32 s4, 0
1818
; GFX7-NEXT: s_addc_u32 s8, s6, 0
@@ -31,7 +31,7 @@ define i32 @s_add_co_select_user() {
3131
; GFX9-NEXT: s_load_dword s6, s[4:5], 0x0
3232
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
3333
; GFX9-NEXT: s_add_u32 s7, s6, s6
34-
; GFX9-NEXT: s_cselect_b64 s[4:5], 1, 0
34+
; GFX9-NEXT: s_cselect_b64 s[4:5], -1, 0
3535
; GFX9-NEXT: s_cmp_lg_u64 s[4:5], 0
3636
; GFX9-NEXT: s_addc_u32 s8, s6, 0
3737
; GFX9-NEXT: s_cselect_b64 s[4:5], -1, 0
@@ -49,7 +49,7 @@ define i32 @s_add_co_select_user() {
4949
; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
5050
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
5151
; GFX10-NEXT: s_add_u32 s5, s4, s4
52-
; GFX10-NEXT: s_cselect_b32 s6, 1, 0
52+
; GFX10-NEXT: s_cselect_b32 s6, -1, 0
5353
; GFX10-NEXT: s_cmp_lg_u32 s6, 0
5454
; GFX10-NEXT: s_addc_u32 s6, s4, 0
5555
; GFX10-NEXT: s_cselect_b32 s7, -1, 0
@@ -67,7 +67,7 @@ define i32 @s_add_co_select_user() {
6767
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
6868
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
6969
; GFX11-NEXT: s_add_u32 s1, s0, s0
70-
; GFX11-NEXT: s_cselect_b32 s2, 1, 0
70+
; GFX11-NEXT: s_cselect_b32 s2, -1, 0
7171
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
7272
; GFX11-NEXT: s_cmp_lg_u32 s2, 0
7373
; GFX11-NEXT: s_addc_u32 s2, s0, 0

llvm/test/CodeGen/AMDGPU/s_uaddo_pseudo.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define amdgpu_ps i32 @s_uaddo_pseudo(i32 inreg %val0) {
1212
; CHECK-LABEL: s_uaddo_pseudo:
1313
; CHECK: ; %bb.0:
1414
; CHECK-NEXT: s_add_u32 s0, s0, 1
15-
; CHECK-NEXT: s_cselect_b64 s[0:1], 1, 0
15+
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
1616
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
1717
; CHECK-NEXT: s_addc_u32 s0, 1, 0
1818
; CHECK-NEXT: ; return to shader part epilog

llvm/test/CodeGen/AMDGPU/sdiv64.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
5656
; GCN-NEXT: s_addc_u32 s15, 0, s16
5757
; GCN-NEXT: s_add_u32 s16, s0, s1
5858
; GCN-NEXT: v_mov_b32_e32 v0, s16
59-
; GCN-NEXT: s_cselect_b64 s[0:1], 1, 0
59+
; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0
6060
; GCN-NEXT: v_mul_hi_u32 v0, s12, v0
6161
; GCN-NEXT: s_or_b32 s0, s0, s1
6262
; GCN-NEXT: s_cmp_lg_u32 s0, 0
@@ -88,7 +88,7 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
8888
; GCN-NEXT: s_add_u32 s0, s1, s0
8989
; GCN-NEXT: s_addc_u32 s12, 0, s12
9090
; GCN-NEXT: s_add_u32 s15, s16, s0
91-
; GCN-NEXT: s_cselect_b64 s[0:1], 1, 0
91+
; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0
9292
; GCN-NEXT: s_or_b32 s0, s0, s1
9393
; GCN-NEXT: s_cmp_lg_u32 s0, 0
9494
; GCN-NEXT: s_addc_u32 s14, s14, s12
@@ -129,12 +129,12 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
129129
; GCN-NEXT: s_sub_i32 s17, s7, s16
130130
; GCN-NEXT: s_mul_i32 s4, s10, s14
131131
; GCN-NEXT: s_sub_u32 s6, s6, s4
132-
; GCN-NEXT: s_cselect_b64 s[4:5], 1, 0
132+
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
133133
; GCN-NEXT: s_or_b32 s18, s4, s5
134134
; GCN-NEXT: s_cmp_lg_u32 s18, 0
135135
; GCN-NEXT: s_subb_u32 s17, s17, s11
136136
; GCN-NEXT: s_sub_u32 s19, s6, s10
137-
; GCN-NEXT: s_cselect_b64 s[4:5], 1, 0
137+
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
138138
; GCN-NEXT: s_or_b32 s4, s4, s5
139139
; GCN-NEXT: s_cmp_lg_u32 s4, 0
140140
; GCN-NEXT: s_subb_u32 s4, s17, 0
@@ -1192,7 +1192,7 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
11921192
; GCN-NEXT: s_addc_u32 s12, 0, s13
11931193
; GCN-NEXT: s_add_u32 s13, s8, s9
11941194
; GCN-NEXT: v_mov_b32_e32 v0, s13
1195-
; GCN-NEXT: s_cselect_b64 s[8:9], 1, 0
1195+
; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
11961196
; GCN-NEXT: v_mul_hi_u32 v0, s2, v0
11971197
; GCN-NEXT: s_or_b32 s8, s8, s9
11981198
; GCN-NEXT: s_cmp_lg_u32 s8, 0
@@ -1224,7 +1224,7 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
12241224
; GCN-NEXT: s_add_u32 s2, s2, s8
12251225
; GCN-NEXT: s_addc_u32 s10, 0, s9
12261226
; GCN-NEXT: s_add_u32 s2, s13, s2
1227-
; GCN-NEXT: s_cselect_b64 s[8:9], 1, 0
1227+
; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
12281228
; GCN-NEXT: s_or_b32 s8, s8, s9
12291229
; GCN-NEXT: s_cmp_lg_u32 s8, 0
12301230
; GCN-NEXT: s_addc_u32 s8, s11, s10
@@ -1244,12 +1244,12 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
12441244
; GCN-NEXT: s_sub_i32 s12, 0, s11
12451245
; GCN-NEXT: s_mul_i32 s8, s6, s10
12461246
; GCN-NEXT: s_sub_u32 s13, 24, s8
1247-
; GCN-NEXT: s_cselect_b64 s[8:9], 1, 0
1247+
; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
12481248
; GCN-NEXT: s_or_b32 s14, s8, s9
12491249
; GCN-NEXT: s_cmp_lg_u32 s14, 0
12501250
; GCN-NEXT: s_subb_u32 s12, s12, s7
12511251
; GCN-NEXT: s_sub_u32 s15, s13, s6
1252-
; GCN-NEXT: s_cselect_b64 s[8:9], 1, 0
1252+
; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
12531253
; GCN-NEXT: s_or_b32 s8, s8, s9
12541254
; GCN-NEXT: s_cmp_lg_u32 s8, 0
12551255
; GCN-NEXT: s_subb_u32 s8, s12, 0

0 commit comments

Comments
 (0)