Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 1 addition & 118 deletions llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2762,122 +2762,6 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
ret double %result
}

define double @v_sqrt_f64__approx_func_fp_math(double %x) #2 {
; GFX6-SDAG-LABEL: v_sqrt_f64__approx_func_fp_math:
; GFX6-SDAG: ; %bb.0:
; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
; GFX6-SDAG-NEXT: s_brev_b32 s5, 8
; GFX6-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: v_sqrt_f64__approx_func_fp_math:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-SDAG-NEXT: s_mov_b32 s4, 0
; GFX8-SDAG-NEXT: s_brev_b32 s5, 8
; GFX8-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: v_sqrt_f64__approx_func_fp_math:
; GFX6-GISEL: ; %bb.0:
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-GISEL-LABEL: v_sqrt_f64__approx_func_fp_math:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call nsz double @llvm.sqrt.f64(double %x)
ret double %result
}

define double @v_sqrt_f64__enough_unsafe_attrs(double %x) #3 {
; GFX6-SDAG-LABEL: v_sqrt_f64__enough_unsafe_attrs:
; GFX6-SDAG: ; %bb.0:
Expand Down Expand Up @@ -3580,8 +3464,7 @@ declare i32 @llvm.amdgcn.readfirstlane(i32) #1

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #1 = { convergent nounwind willreturn memory(none) }
attributes #2 = { "approx-func-fp-math"="true" }
attributes #3 = { "approx-func-fp-math"="true" "no-nans-fp-math"="true" "no-infs-fp-math"="true" }
attributes #3 = { "no-nans-fp-math"="true" "no-infs-fp-math"="true" }
attributes #4 = { "unsafe-fp-math"="true" }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX6: {{.*}}
Expand Down
98 changes: 0 additions & 98 deletions llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1583,104 +1583,6 @@ define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
ret float %result
}

define float @v_exp2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
; SI-SDAG-LABEL: v_exp2_f32_approx_fn_attr:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
; SI-SDAG-NEXT: v_not_b32_e32 v1, 63
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp2_f32_approx_fn_attr:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_approx_fn_attr:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
; VI-SDAG-NEXT: v_not_b32_e32 v1, 63
; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; VI-GISEL-LABEL: v_exp2_f32_approx_fn_attr:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_exp2_f32_approx_fn_attr:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-GISEL-LABEL: v_exp2_f32_approx_fn_attr:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp2_f32_approx_fn_attr:
; R600: ; %bb.0:
; R600-NEXT: CF_END
; R600-NEXT: PAD
;
; CM-LABEL: v_exp2_f32_approx_fn_attr:
; CM: ; %bb.0:
; CM-NEXT: CF_END
; CM-NEXT: PAD
%result = call float @llvm.exp2.f32(float %in)
ret float %result
}

define float @v_exp2_f32_ninf(float %in) {
; SI-SDAG-LABEL: v_exp2_f32_ninf:
; SI-SDAG: ; %bb.0:
Expand Down
123 changes: 0 additions & 123 deletions llvm/test/CodeGen/AMDGPU/llvm.log2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2030,129 +2030,6 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
ret float %result
}

define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
; SI-SDAG-LABEL: v_log2_f32_approx_fn_attr:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log2_f32_approx_fn_attr:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_f32_approx_fn_attr:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2
; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; VI-GISEL-LABEL: v_log2_f32_approx_fn_attr:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_log2_f32_approx_fn_attr:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2
; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-GISEL-LABEL: v_log2_f32_approx_fn_attr:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log2_f32_approx_fn_attr:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc_lo
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log2_f32_approx_fn_attr:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log2_f32_approx_fn_attr:
; R600: ; %bb.0:
; R600-NEXT: CF_END
; R600-NEXT: PAD
;
; CM-LABEL: v_log2_f32_approx_fn_attr:
; CM: ; %bb.0:
; CM-NEXT: CF_END
; CM-NEXT: PAD
%result = call float @llvm.log2.f32(float %in)
ret float %result
}

define float @v_log2_f32_ninf(float %in) {
; SI-SDAG-LABEL: v_log2_f32_ninf:
; SI-SDAG: ; %bb.0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ entry:
ret void
}

attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(readwrite, argmem: write, inaccessiblemem: none) "approx-func-fp-math"="false" "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(readwrite, argmem: write, inaccessiblemem: none) "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/DirectX/ShaderFlags/disable-opt-cs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@ entry:
ret i32 0
}

attributes #0 = { convergent noinline norecurse optnone "approx-func-fp-math"="true" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #1 = { alwaysinline convergent mustprogress norecurse nounwind "approx-func-fp-math"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #0 = { convergent noinline norecurse optnone "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #1 = { alwaysinline convergent mustprogress norecurse nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/DirectX/ShaderFlags/disable-opt-lib.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,5 @@ entry:
ret i32 0
}

attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "approx-func-fp-math"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #1 = { convergent noinline norecurse optnone "approx-func-fp-math"="true" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #1 = { convergent noinline norecurse optnone "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
Loading
Loading