Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 61 additions & 81 deletions libm/src/math/generic/ceil.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub fn ceil_status<F: Float>(x: F) -> FpResult<F> {
F::from_bits(ix)
} else {
// |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0).
if ix & F::SIG_MASK == F::Int::ZERO {
if ix & !F::SIGN_MASK == F::Int::ZERO {
status = Status::OK;
} else {
status = Status::INEXACT;
Expand All @@ -72,103 +72,83 @@ mod tests {
use super::*;
use crate::support::Hexf;

/// Test against https://en.cppreference.com/w/cpp/numeric/math/ceil
fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
let roundtrip = [
F::ZERO,
F::ONE,
F::NEG_ONE,
F::NEG_ZERO,
F::INFINITY,
F::NEG_INFINITY,
];

for x in roundtrip {
let FpResult { val, status } = ceil_status(x);
assert_biteq!(val, x, "{}", Hexf(x));
assert_eq!(status, Status::OK, "{}", Hexf(x));
}
macro_rules! cases {
($f:ty) => {
[
// roundtrip
(0.0, 0.0, Status::OK),
(-0.0, -0.0, Status::OK),
(1.0, 1.0, Status::OK),
(-1.0, -1.0, Status::OK),
(<$f>::INFINITY, <$f>::INFINITY, Status::OK),
(<$f>::NEG_INFINITY, <$f>::NEG_INFINITY, Status::OK),
// with rounding
(0.1, 1.0, Status::INEXACT),
(-0.1, -0.0, Status::INEXACT),
(0.5, 1.0, Status::INEXACT),
(-0.5, -0.0, Status::INEXACT),
(0.9, 1.0, Status::INEXACT),
(-0.9, -0.0, Status::INEXACT),
(1.1, 2.0, Status::INEXACT),
(-1.1, -1.0, Status::INEXACT),
(1.5, 2.0, Status::INEXACT),
(-1.5, -1.0, Status::INEXACT),
(1.9, 2.0, Status::INEXACT),
(-1.9, -1.0, Status::INEXACT),
]
};
}

for &(x, res, res_stat) in cases {
#[track_caller]
fn check<F: Float>(cases: &[(F, F, Status)]) {
for &(x, exp_res, exp_stat) in cases {
let FpResult { val, status } = ceil_status(x);
assert_biteq!(val, res, "{}", Hexf(x));
assert_eq!(status, res_stat, "{}", Hexf(x));
assert_biteq!(val, exp_res, "{x:?} {}", Hexf(x));
assert_eq!(
status,
exp_stat,
"{x:?} {} -> {exp_res:?} {}",
Hexf(x),
Hexf(exp_res)
);
}
}

/* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */

#[test]
#[cfg(f16_enabled)]
fn spec_tests_f16() {
let cases = [
(0.1, 1.0, Status::INEXACT),
(-0.1, -0.0, Status::INEXACT),
(0.9, 1.0, Status::INEXACT),
(-0.9, -0.0, Status::INEXACT),
(1.1, 2.0, Status::INEXACT),
(-1.1, -1.0, Status::INEXACT),
(1.9, 2.0, Status::INEXACT),
(-1.9, -1.0, Status::INEXACT),
];
spec_test::<f16>(&cases);
}

#[test]
fn sanity_check_f32() {
assert_eq!(ceil(1.1f32), 2.0);
assert_eq!(ceil(2.9f32), 3.0);
}

#[test]
fn spec_tests_f32() {
let cases = [
(0.1, 1.0, Status::INEXACT),
(-0.1, -0.0, Status::INEXACT),
(0.9, 1.0, Status::INEXACT),
(-0.9, -0.0, Status::INEXACT),
(1.1, 2.0, Status::INEXACT),
(-1.1, -1.0, Status::INEXACT),
(1.9, 2.0, Status::INEXACT),
(-1.9, -1.0, Status::INEXACT),
];
spec_test::<f32>(&cases);
fn check_f16() {
check::<f16>(&cases!(f16));
check::<f16>(&[
(hf16!("0x1p10"), hf16!("0x1p10"), Status::OK),
(hf16!("-0x1p10"), hf16!("-0x1p10"), Status::OK),
]);
}

#[test]
fn sanity_check_f64() {
assert_eq!(ceil(1.1f64), 2.0);
assert_eq!(ceil(2.9f64), 3.0);
fn check_f32() {
check::<f32>(&cases!(f32));
check::<f32>(&[
(hf32!("0x1p23"), hf32!("0x1p23"), Status::OK),
(hf32!("-0x1p23"), hf32!("-0x1p23"), Status::OK),
]);
}

#[test]
fn spec_tests_f64() {
let cases = [
(0.1, 1.0, Status::INEXACT),
(-0.1, -0.0, Status::INEXACT),
(0.9, 1.0, Status::INEXACT),
(-0.9, -0.0, Status::INEXACT),
(1.1, 2.0, Status::INEXACT),
(-1.1, -1.0, Status::INEXACT),
(1.9, 2.0, Status::INEXACT),
(-1.9, -1.0, Status::INEXACT),
];
spec_test::<f64>(&cases);
fn check_f64() {
check::<f64>(&cases!(f64));
check::<f64>(&[
(hf64!("0x1p52"), hf64!("0x1p52"), Status::OK),
(hf64!("-0x1p52"), hf64!("-0x1p52"), Status::OK),
]);
}

#[test]
#[cfg(f128_enabled)]
fn spec_tests_f128() {
let cases = [
(0.1, 1.0, Status::INEXACT),
(-0.1, -0.0, Status::INEXACT),
(0.9, 1.0, Status::INEXACT),
(-0.9, -0.0, Status::INEXACT),
(1.1, 2.0, Status::INEXACT),
(-1.1, -1.0, Status::INEXACT),
(1.9, 2.0, Status::INEXACT),
(-1.9, -1.0, Status::INEXACT),
];
spec_test::<f128>(&cases);
check::<f128>(&cases!(f128));
check::<f128>(&[
(hf128!("0x1p112"), hf128!("0x1p112"), Status::OK),
(hf128!("-0x1p112"), hf128!("-0x1p112"), Status::OK),
]);
}
}
143 changes: 66 additions & 77 deletions libm/src/math/generic/floor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ pub fn floor_status<F: Float>(x: F) -> FpResult<F> {
return FpResult::ok(x);
}

let status;
let res = if e >= 0 {
// |x| >= 1.0
let m = F::SIG_MASK >> e.unsigned();
Expand All @@ -35,123 +34,113 @@ pub fn floor_status<F: Float>(x: F) -> FpResult<F> {
return FpResult::ok(x);
}

// Otherwise, raise an inexact exception.
status = Status::INEXACT;

if x.is_sign_negative() {
ix += m;
}

ix &= !m;
F::from_bits(ix)
} else {
// |x| < 1.0, raise an inexact exception since truncation will happen.
if ix & F::SIG_MASK == F::Int::ZERO {
status = Status::OK;
} else {
status = Status::INEXACT;
// |x| < 1.0, zero or inexact with truncation

if (ix & !F::SIGN_MASK) == F::Int::ZERO {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The performance regression didn't make sense to me, so I looked at the assembly and then LLVM-IR, and it seems that LLVM is being "smart" by turning this into a floating point comparison x == 0.0_f16, and that generates the equivalent of x as f32 == 0.0_f32 with a call to __extendhfsf2.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for digging into all of this. I wrote up an issue here llvm/llvm-project#180630, wonder what else might be hurting from it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On x86-64, these contain one or more calls to __extendhfsf2 (f16 as f32):

    fmaximumf16
    fminimumf16
    fmaximum_numf16
    fminimum_numf16
    fmaxf16
    fminf16
    fdimf16
    sqrtf16
    rintf16
    roundf16
    fmodf16
    ldexpf16

The min/max -functions are doing floating point comparisons, but might benefit from inspecting the bits instead. The others do float arithmetic and also contains calls to __truncsfhf2 (f32 as f16). So none of them are explicit bit-operations being turned into float ops.

Most other libcalls are in various f128-functions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given the benchmarks primarily affect f16 and f128 and that we can't really easily work around LLVM, I'm just going to accept the regressions here.

return FpResult::ok(x);
}

if x.is_sign_positive() {
// 0.0 <= x < 1.0; rounding down goes toward +0.0.
F::ZERO
} else if ix << 1 != zero {
} else {
// -1.0 < x < 0.0; rounding down goes toward -1.0.
F::NEG_ONE
} else {
// -0.0 remains unchanged
x
}
};

FpResult::new(res, status)
FpResult::new(res, Status::INEXACT)
}

#[cfg(test)]
mod tests {
use super::*;
use crate::support::Hexf;

/// Test against https://en.cppreference.com/w/cpp/numeric/math/floor
fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
let roundtrip = [
F::ZERO,
F::ONE,
F::NEG_ONE,
F::NEG_ZERO,
F::INFINITY,
F::NEG_INFINITY,
];

for x in roundtrip {
let FpResult { val, status } = floor_status(x);
assert_biteq!(val, x, "{}", Hexf(x));
assert_eq!(status, Status::OK, "{}", Hexf(x));
}
macro_rules! cases {
($f:ty) => {
[
// roundtrip
(0.0, 0.0, Status::OK),
(-0.0, -0.0, Status::OK),
(1.0, 1.0, Status::OK),
(-1.0, -1.0, Status::OK),
(<$f>::INFINITY, <$f>::INFINITY, Status::OK),
(<$f>::NEG_INFINITY, <$f>::NEG_INFINITY, Status::OK),
// with rounding
(0.1, 0.0, Status::INEXACT),
(-0.1, -1.0, Status::INEXACT),
(0.5, 0.0, Status::INEXACT),
(-0.5, -1.0, Status::INEXACT),
(0.9, 0.0, Status::INEXACT),
(-0.9, -1.0, Status::INEXACT),
(1.1, 1.0, Status::INEXACT),
(-1.1, -2.0, Status::INEXACT),
(1.5, 1.0, Status::INEXACT),
(-1.5, -2.0, Status::INEXACT),
(1.9, 1.0, Status::INEXACT),
(-1.9, -2.0, Status::INEXACT),
]
};
}

for &(x, res, res_stat) in cases {
#[track_caller]
fn check<F: Float>(cases: &[(F, F, Status)]) {
for &(x, exp_res, exp_stat) in cases {
let FpResult { val, status } = floor_status(x);
assert_biteq!(val, res, "{}", Hexf(x));
assert_eq!(status, res_stat, "{}", Hexf(x));
assert_biteq!(val, exp_res, "{x:?} {}", Hexf(x));
assert_eq!(
status,
exp_stat,
"{x:?} {} -> {exp_res:?} {}",
Hexf(x),
Hexf(exp_res)
);
}
}

/* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */

#[test]
#[cfg(f16_enabled)]
fn spec_tests_f16() {
let cases = [];
spec_test::<f16>(&cases);
}

#[test]
fn sanity_check_f32() {
assert_eq!(floor(0.5f32), 0.0);
assert_eq!(floor(1.1f32), 1.0);
assert_eq!(floor(2.9f32), 2.0);
}

#[test]
fn spec_tests_f32() {
let cases = [
(0.1, 0.0, Status::INEXACT),
(-0.1, -1.0, Status::INEXACT),
(0.9, 0.0, Status::INEXACT),
(-0.9, -1.0, Status::INEXACT),
(1.1, 1.0, Status::INEXACT),
(-1.1, -2.0, Status::INEXACT),
(1.9, 1.0, Status::INEXACT),
(-1.9, -2.0, Status::INEXACT),
];
spec_test::<f32>(&cases);
fn check_f16() {
check::<f16>(&cases!(f16));
check::<f16>(&[
(hf16!("0x1p10"), hf16!("0x1p10"), Status::OK),
(hf16!("-0x1p10"), hf16!("-0x1p10"), Status::OK),
]);
}

#[test]
fn sanity_check_f64() {
assert_eq!(floor(1.1f64), 1.0);
assert_eq!(floor(2.9f64), 2.0);
fn check_f32() {
check::<f32>(&cases!(f32));
check::<f32>(&[
(hf32!("0x1p23"), hf32!("0x1p23"), Status::OK),
(hf32!("-0x1p23"), hf32!("-0x1p23"), Status::OK),
]);
}

#[test]
fn spec_tests_f64() {
let cases = [
(0.1, 0.0, Status::INEXACT),
(-0.1, -1.0, Status::INEXACT),
(0.9, 0.0, Status::INEXACT),
(-0.9, -1.0, Status::INEXACT),
(1.1, 1.0, Status::INEXACT),
(-1.1, -2.0, Status::INEXACT),
(1.9, 1.0, Status::INEXACT),
(-1.9, -2.0, Status::INEXACT),
];
spec_test::<f64>(&cases);
fn check_f64() {
check::<f64>(&cases!(f64));
check::<f64>(&[
(hf64!("0x1p52"), hf64!("0x1p52"), Status::OK),
(hf64!("-0x1p52"), hf64!("-0x1p52"), Status::OK),
]);
}

#[test]
#[cfg(f128_enabled)]
fn spec_tests_f128() {
let cases = [];
spec_test::<f128>(&cases);
check::<f128>(&cases!(f128));
check::<f128>(&[
(hf128!("0x1p112"), hf128!("0x1p112"), Status::OK),
(hf128!("-0x1p112"), hf128!("-0x1p112"), Status::OK),
]);
}
}
Loading
Loading