Skip to content

Commit 9820f4a

Browse files
committed
revert fabs changes and put in saperate PR
1 parent dfe3c1f commit 9820f4a

File tree

10 files changed

+395
-299
lines changed

10 files changed

+395
-299
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5569,7 +5569,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
55695569
case ISD::BUILD_VECTOR:
55705570
case ISD::BUILD_PAIR:
55715571
case ISD::SPLAT_VECTOR:
5572-
case ISD::FABS:
55735572
return false;
55745573

55755574
case ISD::ABS:

llvm/test/CodeGen/AMDGPU/fmaximum3.ll

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3272,9 +3272,10 @@ define double @v_fmaximum3_f64_fabs0(double %a, double %b, double %c) {
32723272
; GFX9-LABEL: v_fmaximum3_f64_fabs0:
32733273
; GFX9: ; %bb.0:
32743274
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3275-
; GFX9-NEXT: v_max_f64 v[6:7], |v[0:1]|, v[2:3]
3275+
; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
3276+
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
32763277
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3277-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
3278+
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
32783279
; GFX9-NEXT: s_nop 1
32793280
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
32803281
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
@@ -3306,9 +3307,10 @@ define double @v_fmaximum3_f64_fabs1(double %a, double %b, double %c) {
33063307
; GFX9-LABEL: v_fmaximum3_f64_fabs1:
33073308
; GFX9: ; %bb.0:
33083309
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3309-
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], |v[2:3]|
3310+
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
3311+
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
33103312
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3311-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
3313+
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
33123314
; GFX9-NEXT: s_nop 1
33133315
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
33143316
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
@@ -3343,11 +3345,12 @@ define double @v_fmaximum3_f64_fabs2(double %a, double %b, double %c) {
33433345
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
33443346
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
33453347
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3346-
; GFX9-NEXT: s_nop 1
3348+
; GFX9-NEXT: v_and_b32_e32 v5, 0x7fffffff, v5
3349+
; GFX9-NEXT: s_nop 0
33473350
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
33483351
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
3349-
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], |v[4:5]|
3350-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
3352+
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
3353+
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
33513354
; GFX9-NEXT: s_nop 1
33523355
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
33533356
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
@@ -3374,14 +3377,17 @@ define double @v_fmaximum3_f64_fabs_all(double %a, double %b, double %c) {
33743377
; GFX9-LABEL: v_fmaximum3_f64_fabs_all:
33753378
; GFX9: ; %bb.0:
33763379
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3377-
; GFX9-NEXT: v_max_f64 v[6:7], |v[0:1]|, |v[2:3]|
3380+
; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
3381+
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
3382+
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
33783383
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3379-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
3380-
; GFX9-NEXT: s_nop 1
3384+
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3385+
; GFX9-NEXT: v_and_b32_e32 v5, 0x7fffffff, v5
3386+
; GFX9-NEXT: s_nop 0
33813387
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
33823388
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
3383-
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], |v[4:5]|
3384-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
3389+
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
3390+
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
33853391
; GFX9-NEXT: s_nop 1
33863392
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
33873393
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
@@ -3446,14 +3452,17 @@ define double @v_fmaximum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
34463452
; GFX9-LABEL: v_fmaximum3_f64_fneg_fabs_all:
34473453
; GFX9: ; %bb.0:
34483454
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3449-
; GFX9-NEXT: v_max_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
3455+
; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
3456+
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
3457+
; GFX9-NEXT: v_max_f64 v[6:7], -v[0:1], -v[2:3]
34503458
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3451-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
3452-
; GFX9-NEXT: s_nop 1
3459+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
3460+
; GFX9-NEXT: v_and_b32_e32 v5, 0x7fffffff, v5
3461+
; GFX9-NEXT: s_nop 0
34533462
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
34543463
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
3455-
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -|v[4:5]|
3456-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
3464+
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -v[4:5]
3465+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
34573466
; GFX9-NEXT: s_nop 1
34583467
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
34593468
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc

llvm/test/CodeGen/AMDGPU/fminimum3.ll

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3272,9 +3272,10 @@ define double @v_fminimum3_f64_fabs0(double %a, double %b, double %c) {
32723272
; GFX9-LABEL: v_fminimum3_f64_fabs0:
32733273
; GFX9: ; %bb.0:
32743274
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3275-
; GFX9-NEXT: v_min_f64 v[6:7], |v[0:1]|, v[2:3]
3275+
; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
3276+
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
32763277
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3277-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
3278+
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
32783279
; GFX9-NEXT: s_nop 1
32793280
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
32803281
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
@@ -3306,9 +3307,10 @@ define double @v_fminimum3_f64_fabs1(double %a, double %b, double %c) {
33063307
; GFX9-LABEL: v_fminimum3_f64_fabs1:
33073308
; GFX9: ; %bb.0:
33083309
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3309-
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], |v[2:3]|
3310+
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
3311+
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
33103312
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3311-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
3313+
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
33123314
; GFX9-NEXT: s_nop 1
33133315
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
33143316
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
@@ -3343,11 +3345,12 @@ define double @v_fminimum3_f64_fabs2(double %a, double %b, double %c) {
33433345
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
33443346
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
33453347
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3346-
; GFX9-NEXT: s_nop 1
3348+
; GFX9-NEXT: v_and_b32_e32 v5, 0x7fffffff, v5
3349+
; GFX9-NEXT: s_nop 0
33473350
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
33483351
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
3349-
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], |v[4:5]|
3350-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
3352+
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
3353+
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
33513354
; GFX9-NEXT: s_nop 1
33523355
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
33533356
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
@@ -3374,14 +3377,17 @@ define double @v_fminimum3_f64_fabs_all(double %a, double %b, double %c) {
33743377
; GFX9-LABEL: v_fminimum3_f64_fabs_all:
33753378
; GFX9: ; %bb.0:
33763379
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3377-
; GFX9-NEXT: v_min_f64 v[6:7], |v[0:1]|, |v[2:3]|
3380+
; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
3381+
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
3382+
; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3]
33783383
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3379-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
3380-
; GFX9-NEXT: s_nop 1
3384+
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3385+
; GFX9-NEXT: v_and_b32_e32 v5, 0x7fffffff, v5
3386+
; GFX9-NEXT: s_nop 0
33813387
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
33823388
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
3383-
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], |v[4:5]|
3384-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
3389+
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
3390+
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
33853391
; GFX9-NEXT: s_nop 1
33863392
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
33873393
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
@@ -3446,14 +3452,17 @@ define double @v_fminimum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
34463452
; GFX9-LABEL: v_fminimum3_f64_fneg_fabs_all:
34473453
; GFX9: ; %bb.0:
34483454
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3449-
; GFX9-NEXT: v_min_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
3455+
; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
3456+
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
3457+
; GFX9-NEXT: v_min_f64 v[6:7], -v[0:1], -v[2:3]
34503458
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
3451-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
3452-
; GFX9-NEXT: s_nop 1
3459+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
3460+
; GFX9-NEXT: v_and_b32_e32 v5, 0x7fffffff, v5
3461+
; GFX9-NEXT: s_nop 0
34533462
; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
34543463
; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
3455-
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], -|v[4:5]|
3456-
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
3464+
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], -v[4:5]
3465+
; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
34573466
; GFX9-NEXT: s_nop 1
34583467
; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
34593468
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc

llvm/test/CodeGen/AMDGPU/fnearbyint.ll

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,9 @@ define amdgpu_kernel void @nearbyint_f64(ptr addrspace(1) %out, double %in) {
223223
; SI-NEXT: v_bfi_b32 v1, s8, v1, v6
224224
; SI-NEXT: v_mov_b32_e32 v7, s2
225225
; SI-NEXT: v_add_f64 v[4:5], s[2:3], v[0:1]
226+
; SI-NEXT: s_bitset0_b32 s3, 31
226227
; SI-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1]
227-
; SI-NEXT: v_cmp_gt_f64_e64 vcc, |s[2:3]|, v[2:3]
228+
; SI-NEXT: v_cmp_gt_f64_e32 vcc, s[2:3], v[2:3]
228229
; SI-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
229230
; SI-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
230231
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -284,14 +285,16 @@ define amdgpu_kernel void @nearbyint_v2f64(ptr addrspace(1) %out, <2 x double> %
284285
; SI-NEXT: v_mov_b32_e32 v9, s5
285286
; SI-NEXT: v_mov_b32_e32 v10, s4
286287
; SI-NEXT: v_add_f64 v[2:3], s[6:7], v[0:1]
288+
; SI-NEXT: s_bitset0_b32 s7, 31
287289
; SI-NEXT: v_add_f64 v[2:3], v[2:3], -v[0:1]
288290
; SI-NEXT: v_bfi_b32 v1, s10, v6, v9
289-
; SI-NEXT: v_cmp_gt_f64_e64 vcc, |s[6:7]|, v[4:5]
291+
; SI-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[4:5]
290292
; SI-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
291293
; SI-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
292294
; SI-NEXT: v_add_f64 v[6:7], s[4:5], v[0:1]
295+
; SI-NEXT: s_bitset0_b32 s5, 31
293296
; SI-NEXT: v_add_f64 v[0:1], v[6:7], -v[0:1]
294-
; SI-NEXT: v_cmp_gt_f64_e64 vcc, |s[4:5]|, v[4:5]
297+
; SI-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[4:5]
295298
; SI-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
296299
; SI-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
297300
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
@@ -365,26 +368,30 @@ define amdgpu_kernel void @nearbyint_v4f64(ptr addrspace(1) %out, <4 x double> %
365368
; SI-NEXT: v_mov_b32_e32 v14, s5
366369
; SI-NEXT: v_mov_b32_e32 v15, s4
367370
; SI-NEXT: v_add_f64 v[0:1], s[2:3], v[4:5]
371+
; SI-NEXT: s_bitset0_b32 s3, 31
368372
; SI-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
369373
; SI-NEXT: v_bfi_b32 v5, s14, v10, v7
370-
; SI-NEXT: v_cmp_gt_f64_e64 vcc, |s[2:3]|, v[8:9]
374+
; SI-NEXT: v_cmp_gt_f64_e32 vcc, s[2:3], v[8:9]
371375
; SI-NEXT: v_cndmask_b32_e32 v3, v1, v2, vcc
372376
; SI-NEXT: v_cndmask_b32_e32 v2, v0, v6, vcc
373377
; SI-NEXT: v_add_f64 v[0:1], s[0:1], v[4:5]
378+
; SI-NEXT: s_bitset0_b32 s1, 31
374379
; SI-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5]
375380
; SI-NEXT: v_bfi_b32 v5, s14, v10, v12
376-
; SI-NEXT: v_cmp_gt_f64_e64 vcc, |s[0:1]|, v[8:9]
381+
; SI-NEXT: v_cmp_gt_f64_e32 vcc, s[0:1], v[8:9]
377382
; SI-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
378383
; SI-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc
379384
; SI-NEXT: v_add_f64 v[6:7], s[6:7], v[4:5]
385+
; SI-NEXT: s_bitset0_b32 s7, 31
380386
; SI-NEXT: v_add_f64 v[6:7], v[6:7], -v[4:5]
381387
; SI-NEXT: v_bfi_b32 v5, s14, v10, v14
382-
; SI-NEXT: v_cmp_gt_f64_e64 vcc, |s[6:7]|, v[8:9]
388+
; SI-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[8:9]
383389
; SI-NEXT: v_cndmask_b32_e32 v7, v7, v12, vcc
384390
; SI-NEXT: v_cndmask_b32_e32 v6, v6, v13, vcc
385391
; SI-NEXT: v_add_f64 v[10:11], s[4:5], v[4:5]
392+
; SI-NEXT: s_bitset0_b32 s5, 31
386393
; SI-NEXT: v_add_f64 v[4:5], v[10:11], -v[4:5]
387-
; SI-NEXT: v_cmp_gt_f64_e64 vcc, |s[4:5]|, v[8:9]
394+
; SI-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[8:9]
388395
; SI-NEXT: v_cndmask_b32_e32 v5, v5, v14, vcc
389396
; SI-NEXT: v_cndmask_b32_e32 v4, v4, v15, vcc
390397
; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:16

llvm/test/CodeGen/AMDGPU/fract-match.ll

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2356,10 +2356,11 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) writeonly capture
23562356
; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
23572357
; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], s[8:9]
23582358
; GFX6-NEXT: s_mov_b32 s8, 0
2359-
; GFX6-NEXT: s_mov_b32 s9, 0x7ff00000
23602359
; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc
2360+
; GFX6-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
2361+
; GFX6-NEXT: s_mov_b32 s9, 0x7ff00000
23612362
; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc
2362-
; GFX6-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[8:9]
2363+
; GFX6-NEXT: v_cmp_neq_f64_e32 vcc, s[8:9], v[0:1]
23632364
; GFX6-NEXT: s_mov_b32 s6, 0
23642365
; GFX6-NEXT: s_mov_b32 s7, 0xf000
23652366
; GFX6-NEXT: s_mov_b32 s4, s6
@@ -2374,43 +2375,46 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) writeonly capture
23742375
; GFX7: ; %bb.0: ; %entry
23752376
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23762377
; GFX7-NEXT: s_mov_b32 s4, 0
2378+
; GFX7-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
2379+
; GFX7-NEXT: v_fract_f64_e32 v[6:7], v[0:1]
2380+
; GFX7-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
23772381
; GFX7-NEXT: s_mov_b32 s5, 0x7ff00000
2378-
; GFX7-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
2379-
; GFX7-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
2380-
; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
2382+
; GFX7-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
23812383
; GFX7-NEXT: s_mov_b32 s6, 0
23822384
; GFX7-NEXT: s_mov_b32 s7, 0xf000
23832385
; GFX7-NEXT: s_mov_b32 s4, s6
23842386
; GFX7-NEXT: s_mov_b32 s5, s6
2385-
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
2386-
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc
2387-
; GFX7-NEXT: buffer_store_dwordx2 v[6:7], v[2:3], s[4:7], 0 addr64
2387+
; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
2388+
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
2389+
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
23882390
; GFX7-NEXT: s_waitcnt vmcnt(0)
23892391
; GFX7-NEXT: s_setpc_b64 s[30:31]
23902392
;
23912393
; GFX8-LABEL: safe_math_fract_f64:
23922394
; GFX8: ; %bb.0: ; %entry
23932395
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23942396
; GFX8-NEXT: s_mov_b32 s4, 0
2397+
; GFX8-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
2398+
; GFX8-NEXT: v_fract_f64_e32 v[6:7], v[0:1]
2399+
; GFX8-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
23952400
; GFX8-NEXT: s_mov_b32 s5, 0x7ff00000
2396-
; GFX8-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
2397-
; GFX8-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
2398-
; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
2399-
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
2400-
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc
2401-
; GFX8-NEXT: global_store_dwordx2 v[2:3], v[6:7], off
2401+
; GFX8-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
2402+
; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off
2403+
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
2404+
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
24022405
; GFX8-NEXT: s_waitcnt vmcnt(0)
24032406
; GFX8-NEXT: s_setpc_b64 s[30:31]
24042407
;
24052408
; GFX11-LABEL: safe_math_fract_f64:
24062409
; GFX11: ; %bb.0: ; %entry
24072410
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2408-
; GFX11-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
2409-
; GFX11-NEXT: v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]|
2410-
; GFX11-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
2411-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
2412-
; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
2413-
; GFX11-NEXT: global_store_b64 v[2:3], v[6:7], off
2411+
; GFX11-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
2412+
; GFX11-NEXT: v_fract_f64_e32 v[6:7], v[0:1]
2413+
; GFX11-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
2414+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2415+
; GFX11-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1]
2416+
; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off
2417+
; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v6 :: v_dual_cndmask_b32 v1, 0, v7
24142418
; GFX11-NEXT: s_setpc_b64 s[30:31]
24152419
;
24162420
; GFX12-LABEL: safe_math_fract_f64:
@@ -2420,13 +2424,14 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) writeonly capture
24202424
; GFX12-NEXT: s_wait_samplecnt 0x0
24212425
; GFX12-NEXT: s_wait_bvhcnt 0x0
24222426
; GFX12-NEXT: s_wait_kmcnt 0x0
2423-
; GFX12-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
2424-
; GFX12-NEXT: v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]|
2425-
; GFX12-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
2427+
; GFX12-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
2428+
; GFX12-NEXT: v_fract_f64_e32 v[6:7], v[0:1]
2429+
; GFX12-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
2430+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
2431+
; GFX12-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1]
2432+
; GFX12-NEXT: global_store_b64 v[2:3], v[4:5], off
24262433
; GFX12-NEXT: s_wait_alu 0xfffd
2427-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
2428-
; GFX12-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
2429-
; GFX12-NEXT: global_store_b64 v[2:3], v[6:7], off
2434+
; GFX12-NEXT: v_dual_cndmask_b32 v0, 0, v6 :: v_dual_cndmask_b32 v1, 0, v7
24302435
; GFX12-NEXT: s_setpc_b64 s[30:31]
24312436
entry:
24322437
%floor = tail call double @llvm.floor.f64(double %x)

0 commit comments

Comments
 (0)