@@ -28,7 +28,8 @@ __ldexpf_avoid_negative_zero := 1
2828 private __ldexpf_helper
2929__ldexpf_helper:
3030.maybe_subnormal:
31- or a, a
31+ ; A = zero, carry = signbit
32+ rra ; restore signbit and clear carry
3233 adc hl, bc ; BC is zero
3334.ret_self:
3435 ld hl, (iy + 3) ; mant
@@ -37,10 +38,9 @@ __ldexpf_helper:
3738; .subnormal_input:
3839 ; BC is -1 here
3940 bit 7, (iy + 11) ; scale sign
40- jr nz, .move_subnormal_down
41- ; .move_subnormal_up:
42- ld a, e ; signbit
4341 ld de, (iy + 9) ; scale
42+ jr nz, _ldexpf.move_subnormal_down
43+ ; .move_subnormal_up:
4444.norm_loop:
4545 add hl, hl
4646 jr c, .normalized
@@ -50,42 +50,16 @@ __ldexpf_helper:
5050 jr c, .norm_loop
5151; .still_subnormal:
5252 ; DE is -1 here
53- inc e ; ld e, 0
53+ ; saves 8F for this path at a cost of 3 bytes:
54+ if 0
55+ inc de ; ld e, 0
5456 jr _ldexpf.finish_subnormal
55- .normalized:
56- inc de
57- ex de, hl
58- jr _ldexpf.scale_up
59-
60- .move_subnormal_down:
61- ; BC is -1 here
62- ; first we need to test that the result won't be zero
63- call __ictlz
64- ; A is [1, 23]
65- ; return zero if (scale < clz_result - 24) or (clz_result - 25 >= scale)
66- sub a, 24 ; A is [-23, -1]
67- ld c, a ; sign extend A
68- ld hl, (iy + 9) ; scale
69- ld a, l
70- or a, a
71- sbc hl, bc
72- cpl
73- jr nc, _ldexpf.shru_common
74- .underflow_to_zero:
75- xor a, a
76- ld b, a ; ld b, 0
77- if __ldexpf_avoid_negative_zero
78- res 7, (iy + 6)
7957end if
80- .overflow_to_inf: ; <-- Carry is set when inf/NaN
81- ld hl, 5 ; ERANGE
82- ld (_errno), hl
83- ld l, h ; ld l, 0
58+ .normalized:
59+ inc de ; don't touch the Z flag
8460 ex de, hl
85- jr nc, _ldexpf.underflow_hijack
86- ld de, $800000
87- ld b, $7F
88- jr _ldexpf.overflow_hijack
61+ ; Z is set here
62+ jr _ldexpf.scale_up_subnormal
8963
9064;-------------------------------------------------------------------------------
9165; When the input and output are normal:
@@ -108,15 +82,21 @@ _ldexpf:
10882 inc a
10983 jr z, __ldexpf_helper.ret_self ; inf NaN
11084 ld a, e ; signbit
85+ ld de, (iy + 9) ; scale
11186 ex de, hl
112- ld hl, (iy + 9) ; scale
11387 add hl, bc ; add expon
11488 bit 7, (iy + 11) ; scale sign
11589 jr nz, .scale_down
11690.scale_up:
117- ld bc, -255 ; $FFFF01
91+ ; HL is [1, $8000FD]
92+ ld c, b ; ld bc, 0
93+ dec bc ; ld bc, -1
94+ .scale_up_subnormal: ; <-- HL is [0, $7FFFFE]
95+ inc c ; ld bc, $FFFF00
96+ inc c ; ld bc, $FFFF01 ; BC is -255 ; sets NZ
97+ ; ld bc, -255
11898 add hl, bc
119- jr c, __ldexpf_helper .overflow_to_inf
99+ jr c, .overflow_to_inf
120100 ; sbc hl, bc ; restore hl
121101 dec l ; we only care about the low 8 bits
122102 ex de, hl
@@ -131,6 +111,51 @@ _ldexpf:
131111 rr l
132112 ret
133113
114+ ;-------------------------------------------------------------------------------
115+
116+ .move_subnormal_down:
117+ ; DE = scale
118+ ; BC is -1 here
119+ ; first we need to test that the result won't be zero
120+ call __ictlz
121+ ex de, hl ; HL = scale
122+ ; A is [1, 23]
123+ ; return zero if (scale < clz_result - 24) or (clz_result - 25 >= scale)
124+ add a, -24 ; A is [-23, -1] and carry is cleared
125+ ld c, a ; sign extend A
126+ ld a, l
127+ sbc hl, bc
128+ cpl
129+ jr nc, _ldexpf.shru_common
130+ ; .underflow:
131+ inc b ; ld b, 0 ; sets Z
132+ .underflow_to_zero: ; <-- Z is set when underflowing to zero
133+ .overflow_to_inf: ; <-- NZ is set when infinite
134+ .raise_erange_avoid_negative_zero:
135+ ld hl, $800000
136+ jr nz, .overflow
137+ add hl, hl ; ld hl, 0
138+ if __ldexpf_avoid_negative_zero
139+ ; prevents negative zero from being emitted on underflow
140+ res 7, (iy + 6)
141+ end if
142+ .overflow:
143+ ex de, hl
144+ .raise_erange:
145+ ld hl, 5 ; ERANGE
146+ ld (_errno), hl
147+ .raise_inexact:
148+ ld hl, ___fe_cur_env
149+ set 5, (hl) ; FE_INEXACT
150+ .result_is_exact:
151+ ld a, (iy + 6) ; expon
152+ rla ; extract signbit
153+ ex de, hl
154+ ; B is $FF if infinite and $00 otherwise
155+ rr b
156+ ld e, b
157+ ret
158+
134159;-------------------------------------------------------------------------------
135160.scale_down:
136161 push de ; mant <<= 1
@@ -141,13 +166,13 @@ _ldexpf:
141166 jr nc, .finish ; expon > 0
142167;-------------------------------------------------------------------------------
143168.shru_to_subnormal:
169+ ; Z is set here
144170 xor a, a
145- sub a, e
146- pop de
147171 ld c, 48 ; ld bc, 24 << 1
148172 add hl, bc
149- jr nc, __ldexpf_helper.underflow_to_zero
150-
173+ pop hl ; reset SP
174+ jr nc, .underflow_to_zero
175+ sub a, e
151176 set 7, (iy + 5) ; set implicit mantissa bit
152177.shru_common:
153178 ; A should be [0, 23]
@@ -157,43 +182,45 @@ _ldexpf:
157182 xor a, a
158183 inc b
159184 ; shift amount will be [1, 24]
160- ld c , a ; ld c , 0
161- ld d , (iy - 1)
185+ ld d , a ; ld d , 0
186+ ld c , (iy - 1)
162187.shru_loop:
163- adc a, c ; collect sticky bits
164- srl d
188+ adc a, d ; collect sticky bits
189+ srl c
165190 rr h
166191 rr l
167192 djnz .shru_loop
168- ld (iy - 1), d
193+ ld (iy - 1), c
169194 pop de
170195 ld d, h
171196 ld e, l
172197
173198 ; round upwards to even if (round && (guard || sticky))
174199 jr nc, .no_round
175- ; be careful not to touch the carry flag
176- inc a
177- dec a
200+ ; we must ensure that FE_INEXACT is raised since rounding has occured
201+ or a, a
178202 jr nz, .round_up
179- bit 0, e ; test guard bit
180- jr z, .no_round
203+ inc a ; ld a, 1
204+ and a, e ; test guard bit
205+ jr z, .no_round_inexact
181206.round_up:
182207 inc de ; round upwards to even (wont overflow)
183208.no_round:
184- adc a, a
209+ adc a, a ; test the sticky and round bits
185210 jr z, .result_is_exact
186- .underflow_hijack:
187- .overflow_hijack:
188- ld hl, ___fe_cur_env
189- set 5, (hl) ; FE_INEXACT
190- .result_is_exact:
191- ld a, (iy + 6) ; get signbit
192- ex de, hl
193- and a, $80 ; copysign
194- or a, b ; used for the overflow to infinite path
195- ld e, a
196- ret
211+ ; carry wont be set
212+ .no_round_inexact:
213+ ; we need to raise ERANGE if the mantissa was rounded down to zero
214+ ld a, c ; UDE
215+ or a, d
216+ or a, e
217+ jr nz, .raise_inexact
218+ ; NZ needs to be set here
219+ if __ldexpf_avoid_negative_zero
220+ jr .raise_erange_avoid_negative_zero
221+ else
222+ jr .raise_erange
223+ end if
197224
198225 extern _errno
199226 extern ___fe_cur_env
0 commit comments