Skip to content

Commit 7c4aef4

Browse files
committed
Migrate to a generic rem_pio2
1 parent 667ba28 commit 7c4aef4

File tree

7 files changed

+284
-163
lines changed

7 files changed

+284
-163
lines changed

libm-test/src/f8_impl.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ impl Float for f8 {
2424
const ZERO: Self = Self(0b0_0000_000);
2525
const NEG_ZERO: Self = Self(0b1_0000_000);
2626
const ONE: Self = Self(0b0_0111_000);
27+
const TWO: Self = Self(0b0_1000_000);
28+
const THREE: Self = Self(0b0_1000_100);
29+
const FOUR: Self = Self(0b0_1001_000);
2730
const NEG_ONE: Self = Self(0b1_0111_000);
2831
const MAX: Self = Self(0b0_1110_111);
2932
const MIN: Self = Self(0b1_1110_111);

libm/src/math/generic/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ mod fmin;
1313
mod fminimum;
1414
mod fminimum_num;
1515
mod fmod;
16+
mod rem_frac_pi_2;
1617
mod rint;
1718
mod round;
1819
mod scalbn;
@@ -31,6 +32,7 @@ pub use fmin::fmin;
3132
pub use fminimum::fminimum;
3233
pub use fminimum_num::fminimum_num;
3334
pub use fmod::fmod;
35+
pub(crate) use rem_frac_pi_2::{medium, rem_frac_pi_2};
3436
pub use rint::rint_round;
3537
pub use round::round;
3638
pub use scalbn::scalbn;
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
#![allow(unused)]
2+
3+
use core::f64::consts;
4+
5+
use crate::support::{CastFrom, CastInto, DInt, Float, HInt, HalfRep, Int, MinInt};
6+
7+
pub(crate) trait RemFracPi2Support: Float<Int: DInt<H: Int>> {
8+
const TO_INT: Self;
9+
const INV_PIO2: Self;
10+
const PIO2_1: Self;
11+
const PIO2_1T: Self;
12+
const PIO2_2: Self;
13+
const PIO2_2T: Self;
14+
const PIO2_3: Self;
15+
const PIO2_3T: Self;
16+
17+
const FRAC_5PI_4_HI: HalfRep<Self>;
18+
const FRAC_3PI_4_HI: HalfRep<Self>;
19+
const FRAC_9PI_4_HI: HalfRep<Self>;
20+
const FRAC_7PI_4_HI: HalfRep<Self>;
21+
const FRAC_3PI_2_HI: HalfRep<Self>;
22+
/// 2pi
23+
const TAU_HI: HalfRep<Self>;
24+
const FRAC_PI_2_HI: HalfRep<Self>;
25+
/// (2^20)(pi/2)
26+
const FRAC_2_POW_20_PI_2: HalfRep<Self>;
27+
28+
const MAGIC: u32 = 23;
29+
const MAGIC_F: Self;
30+
31+
fn large(x: &[Self], y: &mut [Self], e0: i32, prec: usize) -> i32;
32+
}
33+
34+
const EPS: f64 = 2.2204460492503131e-16;
35+
36+
impl RemFracPi2Support for f64 {
37+
const TO_INT: Self = 1.5 / EPS;
38+
const INV_PIO2: Self = 6.36619772367581382433e-01;
39+
const PIO2_1: Self = 1.57079632673412561417e+00;
40+
const PIO2_1T: Self = 6.07710050650619224932e-11;
41+
const PIO2_2: Self = 6.07710050630396597660e-11;
42+
const PIO2_2T: Self = 2.02226624879595063154e-21;
43+
const PIO2_3: Self = 2.02226624871116645580e-21;
44+
const PIO2_3T: Self = 8.47842766036889956997e-32;
45+
const FRAC_5PI_4_HI: HalfRep<Self> = 0x400f6a7a;
46+
const FRAC_3PI_4_HI: HalfRep<Self> = 0x4002d97c;
47+
const FRAC_9PI_4_HI: HalfRep<Self> = 0x401c463b;
48+
const FRAC_7PI_4_HI: HalfRep<Self> = 0x4015fdbc;
49+
const FRAC_3PI_2_HI: HalfRep<Self> = 0x4012d97c;
50+
const TAU_HI: HalfRep<Self> = 0x401921fb;
51+
const FRAC_PI_2_HI: HalfRep<Self> = 0x921fb;
52+
const FRAC_2_POW_20_PI_2: HalfRep<Self> = 0x413921fb;
53+
54+
const MAGIC_F: Self = hf64!("0x1p24");
55+
56+
fn large(x: &[Self], y: &mut [Self], e0: i32, prec: usize) -> i32 {
57+
super::super::rem_pio2_large(x, y, e0, prec)
58+
}
59+
}
60+
61+
pub(crate) fn rem_frac_pi_2<F>(x: F) -> (i32, F, F)
62+
where
63+
F: RemFracPi2Support,
64+
F: CastInto<i32>,
65+
HalfRep<F>: Int + MinInt<Unsigned: Int<OtherSign: Int>>,
66+
// <HalfRep<F> as Int>::Unsigned: Int,
67+
{
68+
// let sign = x.is_sign_positive()
69+
70+
let ix: HalfRep<F> = x.abs().to_bits().hi();
71+
let pos = x.is_sign_positive();
72+
73+
if ix <= F::FRAC_5PI_4_HI {
74+
/* |x| ~<= 5pi/4 */
75+
if (ix & F::SIG_MASK.hi()) == F::FRAC_PI_2_HI {
76+
/* |x| ~= pi/2 or 2pi/2 */
77+
return medium(x, ix); /* cancellation -- use medium case */
78+
}
79+
80+
if ix <= F::FRAC_3PI_4_HI {
81+
/* |x| ~<= 3pi/4 */
82+
if pos {
83+
let z = x - F::PIO2_1; /* one round good to 85 bits */
84+
let y0 = z - F::PIO2_1T;
85+
let y1 = (z - y0) - F::PIO2_1T;
86+
return (1, y0, y1);
87+
} else {
88+
let z = x + F::PIO2_1;
89+
let y0 = z + F::PIO2_1T;
90+
let y1 = (z - y0) + F::PIO2_1T;
91+
return (-1, y0, y1);
92+
}
93+
} else if pos {
94+
let z = x - F::TWO * F::PIO2_1;
95+
let y0 = z - F::TWO * F::PIO2_1T;
96+
let y1 = (z - y0) - F::TWO * F::PIO2_1T;
97+
return (2, y0, y1);
98+
} else {
99+
let z = x + F::TWO * F::PIO2_1;
100+
let y0 = z + F::TWO * F::PIO2_1T;
101+
let y1 = (z - y0) + F::TWO * F::PIO2_1T;
102+
return (-2, y0, y1);
103+
}
104+
}
105+
106+
if ix <= F::FRAC_9PI_4_HI {
107+
/* |x| ~<= 9pi/4 */
108+
if ix <= F::FRAC_7PI_4_HI {
109+
/* |x| ~<= 7pi/4 */
110+
if ix == F::FRAC_3PI_2_HI {
111+
/* |x| ~= 3pi/2 */
112+
return medium(x, ix);
113+
}
114+
115+
if pos {
116+
let z = x - F::THREE * F::PIO2_1;
117+
let y0 = z - F::THREE * F::PIO2_1T;
118+
let y1 = (z - y0) - F::THREE * F::PIO2_1T;
119+
return (3, y0, y1);
120+
} else {
121+
let z = x + F::THREE * F::PIO2_1;
122+
let y0 = z + F::THREE * F::PIO2_1T;
123+
let y1 = (z - y0) + F::THREE * F::PIO2_1T;
124+
return (-3, y0, y1);
125+
}
126+
} else {
127+
if ix == F::TAU_HI {
128+
/* |x| ~= 4pi/2 */
129+
return medium(x, ix);
130+
}
131+
132+
if pos {
133+
let z = x - F::FOUR * F::PIO2_1;
134+
let y0 = z - F::FOUR * F::PIO2_1T;
135+
let y1 = (z - y0) - F::FOUR * F::PIO2_1T;
136+
return (4, y0, y1);
137+
} else {
138+
let z = x + F::FOUR * F::PIO2_1;
139+
let y0 = z + F::FOUR * F::PIO2_1T;
140+
let y1 = (z - y0) + F::FOUR * F::PIO2_1T;
141+
return (-4, y0, y1);
142+
}
143+
}
144+
}
145+
146+
if ix < F::FRAC_2_POW_20_PI_2 {
147+
/* |x| ~< 2^20*(pi/2), medium size */
148+
return medium(x, ix);
149+
}
150+
/*
151+
152+
* all other (large) arguments
153+
*/
154+
if ix >= F::EXP_MASK.hi() {
155+
/* x is inf or NaN */
156+
let y0 = x - x;
157+
let y1 = y0;
158+
return (0, y0, y1);
159+
}
160+
161+
/* set z = scalbn(|x|,-ilogb(x)+23) */
162+
let mut ui = x.to_bits();
163+
ui &= F::SIG_MASK;
164+
// ui |= F::Int::cast_from((F::EXP_BIAS + F::MAGIC) << F::SIG_BITS);
165+
ui |= F::Int::cast_from(F::EXP_BIAS + F::MAGIC) << F::SIG_BITS;
166+
167+
let mut z = F::from_bits(ui);
168+
let mut tx = [F::ZERO; 3];
169+
170+
for i in 0..2 {
171+
// ??
172+
i!(tx, i, =, super::trunc(z));
173+
z = (z - i!(tx, i)) * F::MAGIC_F;
174+
}
175+
176+
i!(tx,2, =, z);
177+
178+
/* skip zero terms, first term is non-zero */
179+
let mut i = 2;
180+
while i != 0 && i!(tx, i) == F::ZERO {
181+
i -= 1;
182+
}
183+
184+
let mut ty = [F::ZERO; 3];
185+
186+
let ex: i32 = (ix >> (HalfRep::<F>::BITS - F::EXP_BITS - 1)).cast();
187+
let n = F::large(&tx[..=i], &mut ty, ex - (F::EXP_BIAS + F::MAGIC) as i32, 1);
188+
189+
if !pos {
190+
return (-n, -i!(ty, 0), -i!(ty, 1));
191+
} else {
192+
(n, i!(ty, 0), i!(ty, 1))
193+
}
194+
}
195+
196+
pub fn medium<F>(x: F, ix: HalfRep<F>) -> (i32, F, F)
197+
where
198+
F: RemFracPi2Support,
199+
F: CastInto<i32>,
200+
HalfRep<F>: Int,
201+
{
202+
/* rint(x/(pi/2)), Assume round-to-nearest. */
203+
let tmp = x * F::INV_PIO2 + F::TO_INT;
204+
// force rounding of tmp to its storage format on x87 to avoid
205+
// excess precision issues.
206+
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
207+
let tmp = force_eval!(tmp);
208+
let f_n = tmp - F::TO_INT;
209+
let n: i32 = f_n.cast();
210+
let mut r = x - f_n * F::PIO2_1;
211+
let mut w = f_n * F::PIO2_1T; /* 1st round, good to 85 bits */
212+
let mut y0 = r - w;
213+
let ui = y0.to_bits();
214+
let ey = y0.ex().signed();
215+
let ex: i32 = (ix >> (HalfRep::<F>::BITS - F::EXP_BITS - 1)).cast();
216+
217+
// (ix >> 20) as i32;
218+
if ex - ey > 16 {
219+
/* 2nd round, good to 118 bits */
220+
let t = r;
221+
w = f_n * F::PIO2_2;
222+
r = t - w;
223+
w = f_n * F::PIO2_2T - ((t - r) - w);
224+
y0 = r - w;
225+
let ey = y0.ex().signed();
226+
if ex - ey > 49 {
227+
/* 3rd round, good to 151 bits, covers all cases */
228+
let t = r;
229+
w = f_n * F::PIO2_3;
230+
r = t - w;
231+
w = f_n * F::PIO2_3T - ((t - r) - w);
232+
y0 = r - w;
233+
}
234+
}
235+
let y1 = (r - y0) - w;
236+
(n, y0, y1)
237+
}

0 commit comments

Comments
 (0)