@@ -2762,122 +2762,6 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
2762
2762
ret double %result
2763
2763
}
2764
2764
2765
- define double @v_sqrt_f64__approx_func_fp_math (double %x ) #2 {
2766
- ; GFX6-SDAG-LABEL: v_sqrt_f64__approx_func_fp_math:
2767
- ; GFX6-SDAG: ; %bb.0:
2768
- ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2769
- ; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
2770
- ; GFX6-SDAG-NEXT: s_brev_b32 s5, 8
2771
- ; GFX6-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
2772
- ; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
2773
- ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
2774
- ; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
2775
- ; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
2776
- ; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
2777
- ; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
2778
- ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
2779
- ; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
2780
- ; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
2781
- ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
2782
- ; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
2783
- ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
2784
- ; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
2785
- ; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
2786
- ; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
2787
- ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
2788
- ; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
2789
- ; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
2790
- ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
2791
- ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
2792
- ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
2793
- ;
2794
- ; GFX8-SDAG-LABEL: v_sqrt_f64__approx_func_fp_math:
2795
- ; GFX8-SDAG: ; %bb.0:
2796
- ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2797
- ; GFX8-SDAG-NEXT: s_mov_b32 s4, 0
2798
- ; GFX8-SDAG-NEXT: s_brev_b32 s5, 8
2799
- ; GFX8-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
2800
- ; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
2801
- ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
2802
- ; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
2803
- ; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
2804
- ; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
2805
- ; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
2806
- ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
2807
- ; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
2808
- ; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
2809
- ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
2810
- ; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
2811
- ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
2812
- ; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
2813
- ; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
2814
- ; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
2815
- ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
2816
- ; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
2817
- ; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
2818
- ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
2819
- ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
2820
- ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
2821
- ;
2822
- ; GFX6-GISEL-LABEL: v_sqrt_f64__approx_func_fp_math:
2823
- ; GFX6-GISEL: ; %bb.0:
2824
- ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2825
- ; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0
2826
- ; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
2827
- ; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
2828
- ; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
2829
- ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
2830
- ; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
2831
- ; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
2832
- ; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
2833
- ; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
2834
- ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
2835
- ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
2836
- ; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
2837
- ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
2838
- ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
2839
- ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
2840
- ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
2841
- ; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
2842
- ; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
2843
- ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
2844
- ; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
2845
- ; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
2846
- ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
2847
- ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
2848
- ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
2849
- ;
2850
- ; GFX8-GISEL-LABEL: v_sqrt_f64__approx_func_fp_math:
2851
- ; GFX8-GISEL: ; %bb.0:
2852
- ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2853
- ; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0
2854
- ; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
2855
- ; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
2856
- ; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
2857
- ; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
2858
- ; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
2859
- ; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
2860
- ; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
2861
- ; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
2862
- ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
2863
- ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
2864
- ; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
2865
- ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
2866
- ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
2867
- ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
2868
- ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
2869
- ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
2870
- ; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
2871
- ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
2872
- ; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
2873
- ; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
2874
- ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
2875
- ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
2876
- ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
2877
- %result = call nsz double @llvm.sqrt.f64 (double %x )
2878
- ret double %result
2879
- }
2880
-
2881
2765
define double @v_sqrt_f64__enough_unsafe_attrs (double %x ) #3 {
2882
2766
; GFX6-SDAG-LABEL: v_sqrt_f64__enough_unsafe_attrs:
2883
2767
; GFX6-SDAG: ; %bb.0:
@@ -3580,8 +3464,7 @@ declare i32 @llvm.amdgcn.readfirstlane(i32) #1
3580
3464
3581
3465
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
3582
3466
attributes #1 = { convergent nounwind willreturn memory(none) }
3583
- attributes #2 = { "approx-func-fp-math" ="true" }
3584
- attributes #3 = { "approx-func-fp-math" ="true" "no-nans-fp-math" ="true" "no-infs-fp-math" ="true" }
3467
+ attributes #3 = { "no-nans-fp-math" ="true" "no-infs-fp-math" ="true" }
3585
3468
attributes #4 = { "unsafe-fp-math" ="true" }
3586
3469
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
3587
3470
; GFX6: {{.*}}
0 commit comments