Skip to content

Commit f04acc2

Browse files
committed
Fixed underflow behaviour for ldexpf
1 parent ca86e17 commit f04acc2

File tree

2 files changed

+134
-75
lines changed

2 files changed

+134
-75
lines changed

src/libc/ldexpf.src

Lines changed: 93 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ __ldexpf_avoid_negative_zero := 1
2828
private __ldexpf_helper
2929
__ldexpf_helper:
3030
.maybe_subnormal:
31-
or a, a
31+
; A = zero, carry = signbit
32+
rra ; restore signbit and clear carry
3233
adc hl, bc ; BC is zero
3334
.ret_self:
3435
ld hl, (iy + 3) ; mant
@@ -37,10 +38,9 @@ __ldexpf_helper:
3738
; .subnormal_input:
3839
; BC is -1 here
3940
bit 7, (iy + 11) ; scale sign
40-
jr nz, .move_subnormal_down
41-
; .move_subnormal_up:
42-
ld a, e ; signbit
4341
ld de, (iy + 9) ; scale
42+
jr nz, _ldexpf.move_subnormal_down
43+
; .move_subnormal_up:
4444
.norm_loop:
4545
add hl, hl
4646
jr c, .normalized
@@ -50,42 +50,16 @@ __ldexpf_helper:
5050
jr c, .norm_loop
5151
; .still_subnormal:
5252
; DE is -1 here
53-
inc e ; ld e, 0
53+
; saves 8F for this path at a cost of 3 bytes:
54+
if 0
55+
inc de ; ld e, 0
5456
jr _ldexpf.finish_subnormal
55-
.normalized:
56-
inc de
57-
ex de, hl
58-
jr _ldexpf.scale_up
59-
60-
.move_subnormal_down:
61-
; BC is -1 here
62-
; first we need to test that the result won't be zero
63-
call __ictlz
64-
; A is [1, 23]
65-
; return zero if (scale < clz_result - 24) or (clz_result - 25 >= scale)
66-
sub a, 24 ; A is [-23, -1]
67-
ld c, a ; sign extend A
68-
ld hl, (iy + 9) ; scale
69-
ld a, l
70-
or a, a
71-
sbc hl, bc
72-
cpl
73-
jr nc, _ldexpf.shru_common
74-
.underflow_to_zero:
75-
xor a, a
76-
ld b, a ; ld b, 0
77-
if __ldexpf_avoid_negative_zero
78-
res 7, (iy + 6)
7957
end if
80-
.overflow_to_inf: ; <-- Carry is set when inf/NaN
81-
ld hl, 5 ; ERANGE
82-
ld (_errno), hl
83-
ld l, h ; ld l, 0
58+
.normalized:
59+
inc de ; don't touch the Z flag
8460
ex de, hl
85-
jr nc, _ldexpf.underflow_hijack
86-
ld de, $800000
87-
ld b, $7F
88-
jr _ldexpf.overflow_hijack
61+
; Z is set here
62+
jr _ldexpf.scale_up_subnormal
8963

9064
;-------------------------------------------------------------------------------
9165
; When the input and output are normal:
@@ -108,15 +82,21 @@ _ldexpf:
10882
inc a
10983
jr z, __ldexpf_helper.ret_self ; inf NaN
11084
ld a, e ; signbit
85+
ld de, (iy + 9) ; scale
11186
ex de, hl
112-
ld hl, (iy + 9) ; scale
11387
add hl, bc ; add expon
11488
bit 7, (iy + 11) ; scale sign
11589
jr nz, .scale_down
11690
.scale_up:
117-
ld bc, -255 ; $FFFF01
91+
; HL is [1, $8000FD]
92+
ld c, b ; ld bc, 0
93+
dec bc ; ld bc, -1
94+
.scale_up_subnormal: ; <-- HL is [0, $7FFFFE]
95+
inc c ; ld bc, $FFFF00
96+
inc c ; ld bc, $FFFF01 ; BC is -255 ; sets NZ
97+
; ld bc, -255
11898
add hl, bc
119-
jr c, __ldexpf_helper.overflow_to_inf
99+
jr c, .overflow_to_inf
120100
; sbc hl, bc ; restore hl
121101
dec l ; we only care about the low 8 bits
122102
ex de, hl
@@ -131,6 +111,51 @@ _ldexpf:
131111
rr l
132112
ret
133113

114+
;-------------------------------------------------------------------------------
115+
116+
.move_subnormal_down:
117+
; DE = scale
118+
; BC is -1 here
119+
; first we need to test that the result won't be zero
120+
call __ictlz
121+
ex de, hl ; HL = scale
122+
; A is [1, 23]
123+
; return zero if (scale < clz_result - 24) or (clz_result - 25 >= scale)
124+
add a, -24 ; A is [-23, -1] and carry is cleared
125+
ld c, a ; sign extend A
126+
ld a, l
127+
sbc hl, bc
128+
cpl
129+
jr nc, _ldexpf.shru_common
130+
; .underflow:
131+
inc b ; ld b, 0 ; sets Z
132+
.underflow_to_zero: ; <-- Z is set when underflowing to zero
133+
.overflow_to_inf: ; <-- NZ is set when infinite
134+
.raise_erange_avoid_negative_zero:
135+
ld hl, $800000
136+
jr nz, .overflow
137+
add hl, hl ; ld hl, 0
138+
if __ldexpf_avoid_negative_zero
139+
; prevents negative zero from being emitted on underflow
140+
res 7, (iy + 6)
141+
end if
142+
.overflow:
143+
ex de, hl
144+
.raise_erange:
145+
ld hl, 5 ; ERANGE
146+
ld (_errno), hl
147+
.raise_inexact:
148+
ld hl, ___fe_cur_env
149+
set 5, (hl) ; FE_INEXACT
150+
.result_is_exact:
151+
ld a, (iy + 6) ; expon
152+
rla ; extract signbit
153+
ex de, hl
154+
; B is $FF if infinite and $00 otherwise
155+
rr b
156+
ld e, b
157+
ret
158+
134159
;-------------------------------------------------------------------------------
135160
.scale_down:
136161
push de ; mant <<= 1
@@ -141,13 +166,13 @@ _ldexpf:
141166
jr nc, .finish ; expon > 0
142167
;-------------------------------------------------------------------------------
143168
.shru_to_subnormal:
169+
; Z is set here
144170
xor a, a
145-
sub a, e
146-
pop de
147171
ld c, 48 ; ld bc, 24 << 1
148172
add hl, bc
149-
jr nc, __ldexpf_helper.underflow_to_zero
150-
173+
pop hl ; reset SP
174+
jr nc, .underflow_to_zero
175+
sub a, e
151176
set 7, (iy + 5) ; set implicit mantissa bit
152177
.shru_common:
153178
; A should be [0, 23]
@@ -157,43 +182,45 @@ _ldexpf:
157182
xor a, a
158183
inc b
159184
; shift amount will be [1, 24]
160-
ld c, a ; ld c, 0
161-
ld d, (iy - 1)
185+
ld d, a ; ld d, 0
186+
ld c, (iy - 1)
162187
.shru_loop:
163-
adc a, c ; collect sticky bits
164-
srl d
188+
adc a, d ; collect sticky bits
189+
srl c
165190
rr h
166191
rr l
167192
djnz .shru_loop
168-
ld (iy - 1), d
193+
ld (iy - 1), c
169194
pop de
170195
ld d, h
171196
ld e, l
172197

173198
; round upwards to even if (round && (guard || sticky))
174199
jr nc, .no_round
175-
; be careful not to touch the carry flag
176-
inc a
177-
dec a
200+
; we must ensure that FE_INEXACT is raised since rounding has occured
201+
or a, a
178202
jr nz, .round_up
179-
bit 0, e ; test guard bit
180-
jr z, .no_round
203+
inc a ; ld a, 1
204+
and a, e ; test guard bit
205+
jr z, .no_round_inexact
181206
.round_up:
182207
inc de ; round upwards to even (wont overflow)
183208
.no_round:
184-
adc a, a
209+
adc a, a ; test the sticky and round bits
185210
jr z, .result_is_exact
186-
.underflow_hijack:
187-
.overflow_hijack:
188-
ld hl, ___fe_cur_env
189-
set 5, (hl) ; FE_INEXACT
190-
.result_is_exact:
191-
ld a, (iy + 6) ; get signbit
192-
ex de, hl
193-
and a, $80 ; copysign
194-
or a, b ; used for the overflow to infinite path
195-
ld e, a
196-
ret
211+
; carry wont be set
212+
.no_round_inexact:
213+
; we need to raise ERANGE if the mantissa was rounded down to zero
214+
ld a, c ; UDE
215+
or a, d
216+
or a, e
217+
jr nz, .raise_inexact
218+
; NZ needs to be set here
219+
if __ldexpf_avoid_negative_zero
220+
jr .raise_erange_avoid_negative_zero
221+
else
222+
jr .raise_erange
223+
end if
197224

198225
extern _errno
199226
extern ___fe_cur_env

test/floating_point/float32_ldexp/src/main.c

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
#include <stdint.h>
44
#include <stdio.h>
55
#include <math.h>
6+
#include <fenv.h>
7+
#include <errno.h>
68
#include <assert.h>
79
#include <ti/screen.h>
810
#include <ti/getcsc.h>
@@ -16,8 +18,18 @@
1618
typedef union F32_pun {
1719
float flt;
1820
uint32_t bin;
21+
struct {
22+
uint24_t mant;
23+
uint8_t expon;
24+
};
1925
} F32_pun;
2026

27+
#if 0
28+
#define test_printf printf
29+
#else
30+
#define test_printf(...)
31+
#endif
32+
2133
size_t run_test(void) {
2234
typedef struct { F32_pun value; int expon; } input_t;
2335
typedef F32_pun output_t;
@@ -26,24 +38,44 @@ size_t run_test(void) {
2638
const input_t *input = (const input_t* )((const void*)f32_ldexp_LUT_input );
2739
const output_t *output = (const output_t*)((const void*)f32_ldexp_LUT_output);
2840
for (size_t i = 0; i < length; i++) {
41+
feclearexcept(FE_ALL_EXCEPT);
42+
errno = 0;
2943
F32_pun result;
3044
result.flt = ldexpf(input[i].value.flt, input[i].expon);
3145
if (result.bin != output[i].bin) {
3246
if (
3347
/* ignore NaN's with differing payloads */
34-
(!(isnan(result.flt) && isnan(output[i].flt))) &&
48+
(!(isnan(result.flt) && isnan(output[i].flt)))
3549
#if 1
3650
/* treat signed zeros as equal for now */
37-
(!(result.bin == 0 && iszero(output[i].flt)))
51+
&& (!((result.bin == 0) && (output[i].bin == UINT32_C(0x80000000))))
3852
#endif
3953
) {
40-
#if 0
41-
printf(
42-
"%zu:\nI: %08lX %+d\nG: %08lX\nT: %08lX\n",
43-
i, input[i].value.bin, input[i].expon,
44-
result.bin, output[i].bin
45-
);
46-
#endif
54+
test_printf(
55+
"%zu:\nI: %08lX %+d\nG: %08lX\nT: %08lX\n",
56+
i, input[i].value.bin, input[i].expon,
57+
result.bin, output[i].bin
58+
);
59+
return i;
60+
}
61+
}
62+
/* test exceptions */
63+
if (!(isnan(input[i].value.flt) || isnan(output[i].flt))) {
64+
int temp;
65+
F32_pun mant_0, mant_1;
66+
mant_0.flt = frexpf(fabsf(input[i].value.flt), &temp);
67+
mant_1.flt = frexpf(fabsf(output[i].flt ), &temp);
68+
bool inexact_raised = fetestexcept(FE_INEXACT);
69+
bool mant_equal = (mant_0.bin == mant_1.bin);
70+
bool became_zero = (mant_0.bin != 0 && mant_1.bin == 0);
71+
bool became_infinite = (mant_0.bin != UINT32_C(0x7F800000) && mant_1.bin == UINT32_C(0x7F800000));
72+
if (!((mant_equal != inexact_raised) && ((became_zero || became_infinite) == (errno == ERANGE)))) {
73+
test_printf(
74+
"%zu: FE: %02X errno: %d\nI: %08lX %+d\nO: %08lX\n",
75+
i, (unsigned int)__fe_cur_env, errno,
76+
input[i].value.bin, input[i].expon, output[i].bin
77+
);
78+
fputs("fenv/errno\n", stdout);
4779
return i;
4880
}
4981
}

0 commit comments

Comments
 (0)