@@ -1499,60 +1499,13 @@ define amdgpu_kernel void @fptrunc_f64_to_f16_afn(
1499
1499
; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
1500
1500
; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
1501
1501
; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
1502
- ; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
1503
- ; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0
1504
- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v1
1505
- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff
1506
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8
1507
- ; GFX1250-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s3, v0
1508
- ; GFX1250-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
1509
- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe
1510
- ; GFX1250-SDAG-FAKE16-NEXT: s_sub_co_i32 s4, 0x3f1, s3
1511
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
1512
- ; GFX1250-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
1513
- ; GFX1250-SDAG-FAKE16-NEXT: v_med3_i32 v1, s4, 0, 13
1514
- ; GFX1250-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1515
- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v1
1516
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
1517
- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v0
1518
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4
1519
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000
1520
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1521
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8
1522
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8
1523
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
1524
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5
1525
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0
1526
- ; GFX1250-SDAG-FAKE16-NEXT: s_addk_co_i32 s3, 0xfc10
1527
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5
1528
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12
1529
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8
1530
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1
1531
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8
1532
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1533
- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7
1534
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5
1535
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0
1536
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3
1537
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0
1538
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2
1539
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9
1540
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1541
- ; GFX1250-SDAG-FAKE16-NEXT: s_add_co_i32 s5, s5, s8
1542
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31
1543
- ; GFX1250-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00
1544
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
1545
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0
1546
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s4, s8, 0x7c00
1547
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s3, 0x40f
1548
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s4, s5
1549
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16
1550
1502
; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
1551
- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000
1503
+ ; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
1552
1504
; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
1553
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3
1554
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1555
- ; GFX1250-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2
1505
+ ; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0
1506
+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
1507
+ ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1508
+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
1556
1509
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
1557
1510
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
1558
1511
;
@@ -3538,109 +3491,14 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16_afn(
3538
3491
; GFX1250-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
3539
3492
; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
3540
3493
; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
3494
+ ; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
3541
3495
; GFX1250-SDAG-FAKE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], null
3542
- ; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0
3543
- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v3
3544
- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff
3545
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8
3546
- ; GFX1250-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, s3, v2
3547
- ; GFX1250-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
3548
- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe
3549
- ; GFX1250-SDAG-FAKE16-NEXT: s_sub_co_i32 s4, 0x3f1, s3
3550
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
3551
- ; GFX1250-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
3552
- ; GFX1250-SDAG-FAKE16-NEXT: v_med3_i32 v3, s4, 0, 13
3553
- ; GFX1250-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
3554
- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v3
3555
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
3556
- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v2
3557
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4
3558
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000
3559
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3560
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8
3561
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8
3562
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
3563
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5
3564
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0
3565
- ; GFX1250-SDAG-FAKE16-NEXT: s_addk_co_i32 s3, 0xfc10
3566
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5
3567
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12
3568
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8
3569
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1
3570
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8
3571
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3572
- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7
3573
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5
3574
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0
3575
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3
3576
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0
3577
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2
3578
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9
3579
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3580
- ; GFX1250-SDAG-FAKE16-NEXT: s_add_co_i32 s5, s5, s8
3581
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31
3582
- ; GFX1250-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00
3583
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
3584
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0
3585
- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v1
3586
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s9, s8, 0x7c00
3587
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s3, 0x40f
3588
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s9, s5
3589
- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s5, s4, 0x1ff
3590
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s10, s4, 8
3591
- ; GFX1250-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s5, v0
3592
- ; GFX1250-SDAG-FAKE16-NEXT: s_bfe_u32 s5, s4, 0xb0014
3593
- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s10, s10, 0xffe
3594
- ; GFX1250-SDAG-FAKE16-NEXT: s_sub_co_i32 s9, 0x3f1, s5
3595
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16
3596
- ; GFX1250-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
3597
- ; GFX1250-SDAG-FAKE16-NEXT: v_med3_i32 v1, s9, 0, 13
3598
- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000
3599
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3600
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3
3601
- ; GFX1250-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
3602
- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s11, v1
3603
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
3604
- ; GFX1250-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s9, v0
3605
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s9, s10, s9
3606
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, 0x1000
3607
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3608
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s12, s10, s11
3609
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s11, s12, s11
3610
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
3611
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s11, s10
3612
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, 1, 0
3613
- ; GFX1250-SDAG-FAKE16-NEXT: s_addk_co_i32 s5, 0xfc10
3614
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s3, s12, s3
3615
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshl_b32 s10, s5, 12
3616
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, s10
3617
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 1
3618
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, s10
3619
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3620
- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s10, s3, 7
3621
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s10, 5
3622
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s11, 1, 0
3623
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s10, 3
3624
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s10, 1, 0
3625
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s3, s3, 2
3626
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s10, s10, s11
3627
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3628
- ; GFX1250-SDAG-FAKE16-NEXT: s_add_co_i32 s3, s3, s10
3629
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 31
3630
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, 0x7c00
3631
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s9, 0
3632
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s8, s8, 0x7c00
3633
- ; GFX1250-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s5, 0x40f
3634
3496
; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
3635
- ; GFX1250-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s8, s3
3636
- ; GFX1250-SDAG-FAKE16-NEXT: s_lshr_b32 s4, s4, 16
3637
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3638
- ; GFX1250-SDAG-FAKE16-NEXT: s_and_b32 s4, s4, 0x8000
3639
- ; GFX1250-SDAG-FAKE16-NEXT: s_or_b32 s3, s4, s3
3640
- ; GFX1250-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
3641
- ; GFX1250-SDAG-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s3, s2
3642
- ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3643
- ; GFX1250-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2
3497
+ ; GFX1250-SDAG-FAKE16-NEXT: s_wait_loadcnt 0x0
3498
+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
3499
+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
3500
+ ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
3501
+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_f32 v0, v0, v2
3644
3502
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
3645
3503
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
3646
3504
;
0 commit comments