@@ -2215,11 +2215,11 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2215
2215
; VI-NEXT: s_add_i32 s14, s8, s9
2216
2216
; VI-NEXT: s_sub_i32 s10, s3, s14
2217
2217
; VI-NEXT: v_readfirstlane_b32 s8, v0
2218
- ; VI-NEXT: s_sub_i32 s15, s2, s8
2218
+ ; VI-NEXT: s_sub_u32 s15, s2, s8
2219
2219
; VI-NEXT: s_cselect_b64 s[8:9], 1, 0
2220
2220
; VI-NEXT: s_cmp_lg_u64 s[8:9], 0
2221
2221
; VI-NEXT: s_subb_u32 s16, s10, s5
2222
- ; VI-NEXT: s_sub_i32 s17, s15, s4
2222
+ ; VI-NEXT: s_sub_u32 s17, s15, s4
2223
2223
; VI-NEXT: s_cselect_b64 s[10:11], 1, 0
2224
2224
; VI-NEXT: s_cmp_lg_u64 s[10:11], 0
2225
2225
; VI-NEXT: s_subb_u32 s10, s16, 0
@@ -2329,7 +2329,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2329
2329
; GFX9-NEXT: s_mul_i32 s9, s12, s9
2330
2330
; GFX9-NEXT: s_add_u32 s9, s13, s9
2331
2331
; GFX9-NEXT: s_addc_u32 s13, 0, s14
2332
- ; GFX9-NEXT: s_add_i32 s14, s8, s9
2332
+ ; GFX9-NEXT: s_add_u32 s14, s8, s9
2333
2333
; GFX9-NEXT: s_cselect_b64 s[8:9], 1, 0
2334
2334
; GFX9-NEXT: s_cmp_lg_u64 s[8:9], 0
2335
2335
; GFX9-NEXT: s_addc_u32 s12, s12, s13
@@ -2353,21 +2353,21 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2353
2353
; GFX9-NEXT: s_mul_i32 s8, s12, s8
2354
2354
; GFX9-NEXT: s_add_u32 s8, s10, s8
2355
2355
; GFX9-NEXT: s_addc_u32 s10, 0, s9
2356
- ; GFX9-NEXT: s_add_i32 s14 , s14, s8
2356
+ ; GFX9-NEXT: s_add_u32 s11 , s14, s8
2357
2357
; GFX9-NEXT: s_cselect_b64 s[8:9], 1, 0
2358
2358
; GFX9-NEXT: s_cmp_lg_u64 s[8:9], 0
2359
2359
; GFX9-NEXT: s_addc_u32 s8, s12, s10
2360
2360
; GFX9-NEXT: s_mul_i32 s10, s2, s8
2361
- ; GFX9-NEXT: s_mul_hi_u32 s11 , s2, s14
2361
+ ; GFX9-NEXT: s_mul_hi_u32 s12 , s2, s11
2362
2362
; GFX9-NEXT: s_mul_hi_u32 s9, s2, s8
2363
- ; GFX9-NEXT: s_add_u32 s10, s11 , s10
2363
+ ; GFX9-NEXT: s_add_u32 s10, s12 , s10
2364
2364
; GFX9-NEXT: s_addc_u32 s9, 0, s9
2365
- ; GFX9-NEXT: s_mul_i32 s13, s3, s14
2366
- ; GFX9-NEXT: s_mul_hi_u32 s12 , s3, s14
2367
- ; GFX9-NEXT: s_add_u32 s10, s10, s13
2368
- ; GFX9-NEXT: s_mul_hi_u32 s11 , s3, s8
2369
- ; GFX9-NEXT: s_addc_u32 s9, s9, s12
2370
- ; GFX9-NEXT: s_addc_u32 s10, s11 , 0
2365
+ ; GFX9-NEXT: s_mul_hi_u32 s13, s3, s11
2366
+ ; GFX9-NEXT: s_mul_i32 s11 , s3, s11
2367
+ ; GFX9-NEXT: s_add_u32 s10, s10, s11
2368
+ ; GFX9-NEXT: s_mul_hi_u32 s12 , s3, s8
2369
+ ; GFX9-NEXT: s_addc_u32 s9, s9, s13
2370
+ ; GFX9-NEXT: s_addc_u32 s10, s12 , 0
2371
2371
; GFX9-NEXT: s_mul_i32 s8, s3, s8
2372
2372
; GFX9-NEXT: s_add_u32 s12, s9, s8
2373
2373
; GFX9-NEXT: s_addc_u32 s13, 0, s10
@@ -2378,11 +2378,11 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2378
2378
; GFX9-NEXT: s_add_i32 s14, s8, s9
2379
2379
; GFX9-NEXT: s_sub_i32 s10, s3, s14
2380
2380
; GFX9-NEXT: s_mul_i32 s8, s6, s12
2381
- ; GFX9-NEXT: s_sub_i32 s15, s2, s8
2381
+ ; GFX9-NEXT: s_sub_u32 s15, s2, s8
2382
2382
; GFX9-NEXT: s_cselect_b64 s[8:9], 1, 0
2383
2383
; GFX9-NEXT: s_cmp_lg_u64 s[8:9], 0
2384
2384
; GFX9-NEXT: s_subb_u32 s16, s10, s7
2385
- ; GFX9-NEXT: s_sub_i32 s17, s15, s6
2385
+ ; GFX9-NEXT: s_sub_u32 s17, s15, s6
2386
2386
; GFX9-NEXT: s_cselect_b64 s[10:11], 1, 0
2387
2387
; GFX9-NEXT: s_cmp_lg_u64 s[10:11], 0
2388
2388
; GFX9-NEXT: s_subb_u32 s10, s16, 0
@@ -2488,7 +2488,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2488
2488
; GFX1010-NEXT: s_addc_u32 s13, s17, 0
2489
2489
; GFX1010-NEXT: s_add_u32 s11, s12, s11
2490
2490
; GFX1010-NEXT: s_addc_u32 s12, 0, s13
2491
- ; GFX1010-NEXT: s_add_i32 s8, s8, s11
2491
+ ; GFX1010-NEXT: s_add_u32 s8, s8, s11
2492
2492
; GFX1010-NEXT: s_cselect_b32 s11, 1, 0
2493
2493
; GFX1010-NEXT: s_mul_hi_u32 s13, s9, s8
2494
2494
; GFX1010-NEXT: s_cmp_lg_u32 s11, 0
@@ -2512,7 +2512,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2512
2512
; GFX1010-NEXT: s_addc_u32 s11, s11, 0
2513
2513
; GFX1010-NEXT: s_add_u32 s9, s10, s9
2514
2514
; GFX1010-NEXT: s_addc_u32 s10, 0, s11
2515
- ; GFX1010-NEXT: s_add_i32 s8, s8, s9
2515
+ ; GFX1010-NEXT: s_add_u32 s8, s8, s9
2516
2516
; GFX1010-NEXT: s_cselect_b32 s9, 1, 0
2517
2517
; GFX1010-NEXT: s_mul_hi_u32 s11, s2, s8
2518
2518
; GFX1010-NEXT: s_cmp_lg_u32 s9, 0
@@ -2537,11 +2537,11 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2537
2537
; GFX1010-NEXT: s_mul_i32 s10, s6, s5
2538
2538
; GFX1010-NEXT: s_add_i32 s9, s9, s11
2539
2539
; GFX1010-NEXT: s_sub_i32 s11, s3, s9
2540
- ; GFX1010-NEXT: s_sub_i32 s10, s2, s10
2540
+ ; GFX1010-NEXT: s_sub_u32 s10, s2, s10
2541
2541
; GFX1010-NEXT: s_cselect_b32 s12, 1, 0
2542
2542
; GFX1010-NEXT: s_cmp_lg_u32 s12, 0
2543
2543
; GFX1010-NEXT: s_subb_u32 s11, s11, s7
2544
- ; GFX1010-NEXT: s_sub_i32 s13, s10, s6
2544
+ ; GFX1010-NEXT: s_sub_u32 s13, s10, s6
2545
2545
; GFX1010-NEXT: s_cselect_b32 s14, 1, 0
2546
2546
; GFX1010-NEXT: s_cmp_lg_u32 s14, 0
2547
2547
; GFX1010-NEXT: s_subb_u32 s11, s11, 0
@@ -2648,7 +2648,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2648
2648
; GFX1030W32-NEXT: s_addc_u32 s13, s17, 0
2649
2649
; GFX1030W32-NEXT: s_add_u32 s11, s12, s11
2650
2650
; GFX1030W32-NEXT: s_addc_u32 s12, 0, s13
2651
- ; GFX1030W32-NEXT: s_add_i32 s8, s8, s11
2651
+ ; GFX1030W32-NEXT: s_add_u32 s8, s8, s11
2652
2652
; GFX1030W32-NEXT: s_cselect_b32 s11, 1, 0
2653
2653
; GFX1030W32-NEXT: s_mul_hi_u32 s13, s9, s8
2654
2654
; GFX1030W32-NEXT: s_cmp_lg_u32 s11, 0
@@ -2672,7 +2672,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2672
2672
; GFX1030W32-NEXT: s_addc_u32 s11, s11, 0
2673
2673
; GFX1030W32-NEXT: s_add_u32 s9, s10, s9
2674
2674
; GFX1030W32-NEXT: s_addc_u32 s10, 0, s11
2675
- ; GFX1030W32-NEXT: s_add_i32 s8, s8, s9
2675
+ ; GFX1030W32-NEXT: s_add_u32 s8, s8, s9
2676
2676
; GFX1030W32-NEXT: s_cselect_b32 s9, 1, 0
2677
2677
; GFX1030W32-NEXT: s_mul_hi_u32 s11, s2, s8
2678
2678
; GFX1030W32-NEXT: s_cmp_lg_u32 s9, 0
@@ -2697,11 +2697,11 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2697
2697
; GFX1030W32-NEXT: s_mul_i32 s10, s4, s7
2698
2698
; GFX1030W32-NEXT: s_add_i32 s9, s9, s11
2699
2699
; GFX1030W32-NEXT: s_sub_i32 s11, s3, s9
2700
- ; GFX1030W32-NEXT: s_sub_i32 s10, s2, s10
2700
+ ; GFX1030W32-NEXT: s_sub_u32 s10, s2, s10
2701
2701
; GFX1030W32-NEXT: s_cselect_b32 s12, 1, 0
2702
2702
; GFX1030W32-NEXT: s_cmp_lg_u32 s12, 0
2703
2703
; GFX1030W32-NEXT: s_subb_u32 s11, s11, s5
2704
- ; GFX1030W32-NEXT: s_sub_i32 s13, s10, s4
2704
+ ; GFX1030W32-NEXT: s_sub_u32 s13, s10, s4
2705
2705
; GFX1030W32-NEXT: s_cselect_b32 s14, 1, 0
2706
2706
; GFX1030W32-NEXT: s_cmp_lg_u32 s14, 0
2707
2707
; GFX1030W32-NEXT: s_subb_u32 s11, s11, 0
@@ -2808,7 +2808,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2808
2808
; GFX1030W64-NEXT: s_addc_u32 s12, s16, 0
2809
2809
; GFX1030W64-NEXT: s_add_u32 s7, s11, s7
2810
2810
; GFX1030W64-NEXT: s_addc_u32 s11, 0, s12
2811
- ; GFX1030W64-NEXT: s_add_i32 s12, s6, s7
2811
+ ; GFX1030W64-NEXT: s_add_u32 s12, s6, s7
2812
2812
; GFX1030W64-NEXT: s_cselect_b64 s[6:7], 1, 0
2813
2813
; GFX1030W64-NEXT: s_mul_hi_u32 s13, s9, s12
2814
2814
; GFX1030W64-NEXT: s_cmp_lg_u64 s[6:7], 0
@@ -2832,16 +2832,16 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2832
2832
; GFX1030W64-NEXT: s_addc_u32 s7, s10, 0
2833
2833
; GFX1030W64-NEXT: s_add_u32 s6, s6, s9
2834
2834
; GFX1030W64-NEXT: s_addc_u32 s9, 0, s7
2835
- ; GFX1030W64-NEXT: s_add_i32 s12 , s12, s6
2835
+ ; GFX1030W64-NEXT: s_add_u32 s10 , s12, s6
2836
2836
; GFX1030W64-NEXT: s_cselect_b64 s[6:7], 1, 0
2837
- ; GFX1030W64-NEXT: s_mul_hi_u32 s10 , s2, s12
2837
+ ; GFX1030W64-NEXT: s_mul_hi_u32 s11 , s2, s10
2838
2838
; GFX1030W64-NEXT: s_cmp_lg_u64 s[6:7], 0
2839
- ; GFX1030W64-NEXT: s_mul_hi_u32 s6, s3, s12
2839
+ ; GFX1030W64-NEXT: s_mul_hi_u32 s6, s3, s10
2840
2840
; GFX1030W64-NEXT: s_addc_u32 s7, s8, s9
2841
- ; GFX1030W64-NEXT: s_mul_i32 s8, s3, s12
2842
- ; GFX1030W64-NEXT: s_mul_i32 s11 , s2, s7
2841
+ ; GFX1030W64-NEXT: s_mul_i32 s8, s3, s10
2842
+ ; GFX1030W64-NEXT: s_mul_i32 s10 , s2, s7
2843
2843
; GFX1030W64-NEXT: s_mul_hi_u32 s9, s2, s7
2844
- ; GFX1030W64-NEXT: s_add_u32 s10, s10, s11
2844
+ ; GFX1030W64-NEXT: s_add_u32 s10, s11, s10
2845
2845
; GFX1030W64-NEXT: s_addc_u32 s9, 0, s9
2846
2846
; GFX1030W64-NEXT: s_mul_hi_u32 s12, s3, s7
2847
2847
; GFX1030W64-NEXT: s_add_u32 s8, s10, s8
@@ -2857,11 +2857,11 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2857
2857
; GFX1030W64-NEXT: s_add_i32 s12, s6, s8
2858
2858
; GFX1030W64-NEXT: s_mul_i32 s6, s4, s10
2859
2859
; GFX1030W64-NEXT: s_sub_i32 s8, s3, s12
2860
- ; GFX1030W64-NEXT: s_sub_i32 s13, s2, s6
2860
+ ; GFX1030W64-NEXT: s_sub_u32 s13, s2, s6
2861
2861
; GFX1030W64-NEXT: s_cselect_b64 s[6:7], 1, 0
2862
2862
; GFX1030W64-NEXT: s_cmp_lg_u64 s[6:7], 0
2863
2863
; GFX1030W64-NEXT: s_subb_u32 s14, s8, s5
2864
- ; GFX1030W64-NEXT: s_sub_i32 s15, s13, s4
2864
+ ; GFX1030W64-NEXT: s_sub_u32 s15, s13, s4
2865
2865
; GFX1030W64-NEXT: s_cselect_b64 s[8:9], 1, 0
2866
2866
; GFX1030W64-NEXT: s_cmp_lg_u64 s[8:9], 0
2867
2867
; GFX1030W64-NEXT: s_subb_u32 s8, s14, 0
@@ -2973,7 +2973,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2973
2973
; GFX11-NEXT: s_addc_u32 s13, s17, 0
2974
2974
; GFX11-NEXT: s_add_u32 s11, s12, s11
2975
2975
; GFX11-NEXT: s_addc_u32 s12, 0, s13
2976
- ; GFX11-NEXT: s_add_i32 s8, s8, s11
2976
+ ; GFX11-NEXT: s_add_u32 s8, s8, s11
2977
2977
; GFX11-NEXT: s_cselect_b32 s11, 1, 0
2978
2978
; GFX11-NEXT: s_mul_hi_u32 s13, s9, s8
2979
2979
; GFX11-NEXT: s_cmp_lg_u32 s11, 0
@@ -2997,7 +2997,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
2997
2997
; GFX11-NEXT: s_addc_u32 s11, s11, 0
2998
2998
; GFX11-NEXT: s_add_u32 s9, s10, s9
2999
2999
; GFX11-NEXT: s_addc_u32 s10, 0, s11
3000
- ; GFX11-NEXT: s_add_i32 s8, s8, s9
3000
+ ; GFX11-NEXT: s_add_u32 s8, s8, s9
3001
3001
; GFX11-NEXT: s_cselect_b32 s9, 1, 0
3002
3002
; GFX11-NEXT: s_mul_hi_u32 s11, s2, s8
3003
3003
; GFX11-NEXT: s_cmp_lg_u32 s9, 0
@@ -3023,11 +3023,11 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
3023
3023
; GFX11-NEXT: s_add_i32 s9, s9, s11
3024
3024
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
3025
3025
; GFX11-NEXT: s_sub_i32 s11, s3, s9
3026
- ; GFX11-NEXT: s_sub_i32 s10, s2, s10
3026
+ ; GFX11-NEXT: s_sub_u32 s10, s2, s10
3027
3027
; GFX11-NEXT: s_cselect_b32 s12, 1, 0
3028
3028
; GFX11-NEXT: s_cmp_lg_u32 s12, 0
3029
3029
; GFX11-NEXT: s_subb_u32 s11, s11, s5
3030
- ; GFX11-NEXT: s_sub_i32 s13, s10, s4
3030
+ ; GFX11-NEXT: s_sub_u32 s13, s10, s4
3031
3031
; GFX11-NEXT: s_cselect_b32 s14, 1, 0
3032
3032
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
3033
3033
; GFX11-NEXT: s_cmp_lg_u32 s14, 0
@@ -3105,7 +3105,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
3105
3105
; GFX1250-NEXT: s_wait_kmcnt 0x0
3106
3106
; GFX1250-NEXT: s_or_b64 s[6:7], s[2:3], s[4:5]
3107
3107
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3108
- ; GFX1250-NEXT: s_and_b64 s[6:7], s[6:7], lit64( 0xffffffff00000000)
3108
+ ; GFX1250-NEXT: s_and_b64 s[6:7], s[6:7], 0xffffffff00000000
3109
3109
; GFX1250-NEXT: s_cmp_lg_u64 s[6:7], 0
3110
3110
; GFX1250-NEXT: s_cbranch_scc0 .LBB16_4
3111
3111
; GFX1250-NEXT: ; %bb.1:
@@ -3140,7 +3140,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
3140
3140
; GFX1250-NEXT: s_add_co_ci_u32 s13, s18, 0
3141
3141
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3142
3142
; GFX1250-NEXT: s_add_nc_u64 s[12:13], s[6:7], s[12:13]
3143
- ; GFX1250-NEXT: s_add_co_i32 s8, s8, s12
3143
+ ; GFX1250-NEXT: s_add_co_u32 s8, s8, s12
3144
3144
; GFX1250-NEXT: s_cselect_b32 s6, 1, 0
3145
3145
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
3146
3146
; GFX1250-NEXT: s_cmp_lg_u32 s6, 0
@@ -3160,7 +3160,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
3160
3160
; GFX1250-NEXT: s_add_co_ci_u32 s11, s16, 0
3161
3161
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3162
3162
; GFX1250-NEXT: s_add_nc_u64 s[10:11], s[6:7], s[10:11]
3163
- ; GFX1250-NEXT: s_add_co_i32 s8, s8, s10
3163
+ ; GFX1250-NEXT: s_add_co_u32 s8, s8, s10
3164
3164
; GFX1250-NEXT: s_cselect_b32 s10, 1, 0
3165
3165
; GFX1250-NEXT: s_mul_hi_u32 s6, s2, s8
3166
3166
; GFX1250-NEXT: s_cmp_lg_u32 s10, 0
@@ -3177,17 +3177,17 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
3177
3177
; GFX1250-NEXT: s_add_co_ci_u32 s11, s13, 0
3178
3178
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3179
3179
; GFX1250-NEXT: s_add_nc_u64 s[8:9], s[6:7], s[10:11]
3180
- ; GFX1250-NEXT: s_and_b64 s[10:11], s[8:9], lit64( 0xffffffff00000000)
3180
+ ; GFX1250-NEXT: s_and_b64 s[10:11], s[8:9], 0xffffffff00000000
3181
3181
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3182
3182
; GFX1250-NEXT: s_or_b32 s10, s10, s8
3183
3183
; GFX1250-NEXT: s_mul_u64 s[8:9], s[4:5], s[10:11]
3184
3184
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3185
- ; GFX1250-NEXT: s_sub_co_i32 s6, s2, s8
3185
+ ; GFX1250-NEXT: s_sub_co_u32 s6, s2, s8
3186
3186
; GFX1250-NEXT: s_cselect_b32 s8, 1, 0
3187
3187
; GFX1250-NEXT: s_sub_co_i32 s12, s3, s9
3188
3188
; GFX1250-NEXT: s_cmp_lg_u32 s8, 0
3189
3189
; GFX1250-NEXT: s_sub_co_ci_u32 s12, s12, s5
3190
- ; GFX1250-NEXT: s_sub_co_i32 s13, s6, s4
3190
+ ; GFX1250-NEXT: s_sub_co_u32 s13, s6, s4
3191
3191
; GFX1250-NEXT: s_cselect_b32 s14, 1, 0
3192
3192
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
3193
3193
; GFX1250-NEXT: s_cmp_lg_u32 s14, 0
0 commit comments