@@ -1525,15 +1525,18 @@ def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64", F64RT, F64RT, int_nvvm_sqrt_rz_
15251525def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64", F64RT, F64RT, int_nvvm_sqrt_rm_d>;
15261526def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64", F64RT, F64RT, int_nvvm_sqrt_rp_d>;
15271527
1528+ def fsqrt_approx : PatFrags<(ops node:$a),
1529+ [(fsqrt node:$a),
1530+ (int_nvvm_sqrt_f node:$a)], [{
1531+ return !usePrecSqrtF32(N);
1532+ }]>;
1533+
15281534// nvvm_sqrt intrinsic
1529- def : Pat<(int_nvvm_sqrt_f f32:$a),
1530- (INT_NVVM_SQRT_RN_FTZ_F $a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
1531- def : Pat<(int_nvvm_sqrt_f f32:$a),
1532- (INT_NVVM_SQRT_RN_F $a)>, Requires<[do_SQRTF32_RN]>;
1533- def : Pat<(int_nvvm_sqrt_f f32:$a),
1534- (INT_NVVM_SQRT_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>;
1535- def : Pat<(int_nvvm_sqrt_f f32:$a),
1536- (INT_NVVM_SQRT_APPROX_F $a)>;
1535+ def : Pat<(int_nvvm_sqrt_f f32:$a), (INT_NVVM_SQRT_RN_FTZ_F $a)>, Requires<[doF32FTZ]>;
1536+ def : Pat<(int_nvvm_sqrt_f f32:$a), (INT_NVVM_SQRT_RN_F $a)>;
1537+
1538+ def : Pat<(fsqrt_approx f32:$a), (INT_NVVM_SQRT_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>;
1539+ def : Pat<(fsqrt_approx f32:$a), (INT_NVVM_SQRT_APPROX_F $a)>;
15371540
15381541//
15391542// Rsqrt
@@ -1556,20 +1559,14 @@ def: Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_f f32:$a)),
15561559def: Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_ftz_f f32:$a)),
15571560 (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
15581561 Requires<[doRsqrtOpt]>;
1559- // same for int_nvvm_sqrt_f when non-precision sqrt is requested
1560- def: Pat<(fdiv f32imm_1, (int_nvvm_sqrt_f f32:$a)),
1561- (INT_NVVM_RSQRT_APPROX_F $a)>,
1562- Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
1563- def: Pat<(fdiv f32imm_1, (int_nvvm_sqrt_f f32:$a)),
1564- (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
1565- Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
15661562
1567- def: Pat<(fdiv f32imm_1, (fsqrt f32:$a)),
1563+ // same for int_nvvm_sqrt_f when non-precision sqrt is requested
1564+ def: Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)),
15681565 (INT_NVVM_RSQRT_APPROX_F $a)>,
1569- Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
1570- def: Pat<(fdiv f32imm_1, (fsqrt f32:$a)),
1566+ Requires<[doRsqrtOpt, doNoF32FTZ]>;
1567+ def: Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)),
15711568 (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
1572- Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
1569+ Requires<[doRsqrtOpt, doF32FTZ]>;
15731570//
15741571// Add
15751572//
0 commit comments