@@ -2762,122 +2762,6 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
27622762 ret double %result
27632763}
27642764
2765- define double @v_sqrt_f64__approx_func_fp_math (double %x ) #2 {
2766- ; GFX6-SDAG-LABEL: v_sqrt_f64__approx_func_fp_math:
2767- ; GFX6-SDAG: ; %bb.0:
2768- ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2769- ; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
2770- ; GFX6-SDAG-NEXT: s_brev_b32 s5, 8
2771- ; GFX6-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
2772- ; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
2773- ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
2774- ; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
2775- ; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
2776- ; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
2777- ; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
2778- ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
2779- ; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
2780- ; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
2781- ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
2782- ; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
2783- ; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
2784- ; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
2785- ; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
2786- ; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
2787- ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
2788- ; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
2789- ; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
2790- ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
2791- ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
2792- ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
2793- ;
2794- ; GFX8-SDAG-LABEL: v_sqrt_f64__approx_func_fp_math:
2795- ; GFX8-SDAG: ; %bb.0:
2796- ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2797- ; GFX8-SDAG-NEXT: s_mov_b32 s4, 0
2798- ; GFX8-SDAG-NEXT: s_brev_b32 s5, 8
2799- ; GFX8-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
2800- ; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
2801- ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
2802- ; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
2803- ; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
2804- ; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
2805- ; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
2806- ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
2807- ; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
2808- ; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
2809- ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
2810- ; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
2811- ; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
2812- ; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
2813- ; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
2814- ; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
2815- ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
2816- ; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
2817- ; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
2818- ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
2819- ; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
2820- ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
2821- ;
2822- ; GFX6-GISEL-LABEL: v_sqrt_f64__approx_func_fp_math:
2823- ; GFX6-GISEL: ; %bb.0:
2824- ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2825- ; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0
2826- ; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
2827- ; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
2828- ; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
2829- ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
2830- ; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
2831- ; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
2832- ; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
2833- ; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
2834- ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
2835- ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
2836- ; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
2837- ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
2838- ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
2839- ; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
2840- ; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
2841- ; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
2842- ; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
2843- ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
2844- ; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
2845- ; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
2846- ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
2847- ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
2848- ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
2849- ;
2850- ; GFX8-GISEL-LABEL: v_sqrt_f64__approx_func_fp_math:
2851- ; GFX8-GISEL: ; %bb.0:
2852- ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2853- ; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0
2854- ; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
2855- ; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
2856- ; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
2857- ; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
2858- ; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
2859- ; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
2860- ; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
2861- ; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
2862- ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
2863- ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
2864- ; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
2865- ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
2866- ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
2867- ; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
2868- ; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
2869- ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
2870- ; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
2871- ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
2872- ; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
2873- ; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
2874- ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
2875- ; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
2876- ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
2877- %result = call nsz double @llvm.sqrt.f64 (double %x )
2878- ret double %result
2879- }
2880-
28812765define double @v_sqrt_f64__enough_unsafe_attrs (double %x ) #3 {
28822766; GFX6-SDAG-LABEL: v_sqrt_f64__enough_unsafe_attrs:
28832767; GFX6-SDAG: ; %bb.0:
@@ -3580,8 +3464,7 @@ declare i32 @llvm.amdgcn.readfirstlane(i32) #1
35803464
35813465attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
35823466attributes #1 = { convergent nounwind willreturn memory(none) }
3583- attributes #2 = { "approx-func-fp-math" ="true" }
3584- attributes #3 = { "approx-func-fp-math" ="true" "no-nans-fp-math" ="true" "no-infs-fp-math" ="true" }
3467+ attributes #3 = { "no-nans-fp-math" ="true" "no-infs-fp-math" ="true" }
35853468attributes #4 = { "unsafe-fp-math" ="true" }
35863469;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
35873470; GFX6: {{.*}}
0 commit comments