@@ -1520,15 +1520,18 @@ def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64", F64RT, F64RT, int_nvvm_sqrt_rz_
15201520def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64", F64RT, F64RT, int_nvvm_sqrt_rm_d>;
15211521def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64", F64RT, F64RT, int_nvvm_sqrt_rp_d>;
15221522
1523+ def fsqrt_approx : PatFrags<(ops node:$a),
1524+ [(fsqrt node:$a),
1525+ (int_nvvm_sqrt_f node:$a)], [{
1526+ return !usePrecSqrtF32(N);
1527+ }]>;
1528+
15231529// nvvm_sqrt intrinsic
1524- def : Pat<(int_nvvm_sqrt_f f32:$a),
1525- (INT_NVVM_SQRT_RN_FTZ_F $a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
1526- def : Pat<(int_nvvm_sqrt_f f32:$a),
1527- (INT_NVVM_SQRT_RN_F $a)>, Requires<[do_SQRTF32_RN]>;
1528- def : Pat<(int_nvvm_sqrt_f f32:$a),
1529- (INT_NVVM_SQRT_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>;
1530- def : Pat<(int_nvvm_sqrt_f f32:$a),
1531- (INT_NVVM_SQRT_APPROX_F $a)>;
1530+ def : Pat<(int_nvvm_sqrt_f f32:$a), (INT_NVVM_SQRT_RN_FTZ_F $a)>, Requires<[doF32FTZ]>;
1531+ def : Pat<(int_nvvm_sqrt_f f32:$a), (INT_NVVM_SQRT_RN_F $a)>;
1532+
1533+ def : Pat<(fsqrt_approx f32:$a), (INT_NVVM_SQRT_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>;
1534+ def : Pat<(fsqrt_approx f32:$a), (INT_NVVM_SQRT_APPROX_F $a)>;
15321535
15331536//
15341537// Rsqrt
@@ -1551,20 +1554,14 @@ def: Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_f f32:$a)),
15511554def: Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_ftz_f f32:$a)),
15521555 (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
15531556 Requires<[doRsqrtOpt]>;
1554- // same for int_nvvm_sqrt_f when non-precision sqrt is requested
1555- def: Pat<(fdiv f32imm_1, (int_nvvm_sqrt_f f32:$a)),
1556- (INT_NVVM_RSQRT_APPROX_F $a)>,
1557- Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
1558- def: Pat<(fdiv f32imm_1, (int_nvvm_sqrt_f f32:$a)),
1559- (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
1560- Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
15611557
1562- def: Pat<(fdiv f32imm_1, (fsqrt f32:$a)),
1558+ // same for int_nvvm_sqrt_f when non-precision sqrt is requested
1559+ def: Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)),
15631560 (INT_NVVM_RSQRT_APPROX_F $a)>,
1564- Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
1565- def: Pat<(fdiv f32imm_1, (fsqrt f32:$a)),
1561+ Requires<[doRsqrtOpt, doNoF32FTZ]>;
1562+ def: Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)),
15661563 (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
1567- Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
1564+ Requires<[doRsqrtOpt, doF32FTZ]>;
15681565//
15691566// Add
15701567//
0 commit comments