@@ -16,202 +16,187 @@ _scalbn := _ldexpf
1616
1717else
1818
19- if 1
19+ ; (set to 0 or 1) avoid returning negative zero on underflow with Ti's floats
20+ __ldexpf_avoid_negative_zero := 1
2021
21- ; NOTE: since Ti floats are used, negative zero will not be returned unless the
22- ; input was negative zero.
23- ;
24- ; normal inputs are handled correctly, unless the output is subnormal
25- ; subnormal inputs/outputs return zero and set ERANGE and FE_INEXACT
26- ; zero/infinite/NaN inputs are handled correctly
27- _ldexpf:
28- _ldexp:
29- _scalbnf:
22+ ; ldexpf behaviour:
23+ ; - signed zero, infinity, and NaN inputs are returned unmodified
24+ ; - ERRNO and FE_INEXACT are set if a finite value becomes zero or infinite
25+ ; - FE_INEXACT is set if rounding occured
26+ ;-------------------------------------------------------------------------------
27+
28+ private __ldexpf_helper
29+ __ldexpf_helper:
30+ .maybe_subnormal:
31+ or a, a
32+ adc hl, bc ; BC is zero
33+ .ret_self:
34+ ld hl, (iy + 3) ; mant
35+ ret z ; return zero/inf/NaN
36+ dec bc ; BC is now -1
37+ ; .subnormal_input:
38+ ; BC is -1 here
39+ bit 7, (iy + 11) ; scale sign
40+ jr nz, .move_subnormal_down
41+ ; .move_subnormal_up:
42+ ld a, e ; signbit
43+ ld de, (iy + 9) ; scale
44+ .norm_loop:
45+ add hl, hl
46+ jr c, .normalized
47+ ex de, hl
48+ add hl, bc ; --scale
49+ ex de, hl
50+ jr c, .norm_loop
51+ ; .still_subnormal:
52+ ; DE is -1 here
53+ inc e ; ld e, 0
54+ jr _ldexpf.finish_subnormal
55+ .normalized:
56+ inc de
57+ ex de, hl
58+ jr _ldexpf.scale_up
59+
60+ .move_subnormal_down:
61+ ; BC is -1 here
62+ ; first we need to test that the result won't be zero
63+ call __ictlz
64+ ; A is [1, 23]
65+ ; return zero if (scale < clz_result - 24) or (clz_result - 25 >= scale)
66+ sub a, 24 ; A is [-23, -1]
67+ ld c, a ; sign extend A
68+ ld hl, (iy + 9) ; scale
69+ ld a, l
70+ or a, a
71+ sbc hl, bc
72+ cpl
73+ jr nc, _ldexpf.shru_common
74+ .underflow_to_zero:
75+ xor a, a
76+ ld b, a ; ld b, 0
77+ if __ldexpf_avoid_negative_zero
78+ res 7, (iy + 6)
79+ end if
80+ .overflow_to_inf: ; <-- Carry is set when inf/NaN
81+ ld hl, 5 ; ERANGE
82+ ld (_errno), hl
83+ ld l, h ; ld l, 0
84+ ex de, hl
85+ jr nc, _ldexpf.underflow_hijack
86+ ld de, $800000
87+ ld b, $7F
88+ jr _ldexpf.overflow_hijack
89+
90+ ;-------------------------------------------------------------------------------
91+ ; When the input and output are normal:
92+ ; scaling up : 60F + 12R + 4W + 2
93+ ; scaling down: 60F + 12R + 4W + 4
3094_scalbn:
95+ _scalbnf:
96+ _ldexp:
97+ _ldexpf:
3198 ld iy, 0
3299 lea bc, iy + 0
33100 add iy, sp
34101 ld hl, (iy + 3) ; mant
35102 add hl, hl
36103 ld a, (iy + 6) ; expon
104+ ld e, a ; signbit
37105 adc a, a
38- jr z, .maybe_subnormal
106+ jr z, __ldexpf_helper.maybe_subnormal
107+ ld c, a
39108 inc a
40- jr z, .ret_self ; inf NaN
41- dec a
109+ jr z, __ldexpf_helper .ret_self ; inf NaN
110+ ld a, e ; signbit
42111 ex de, hl
43112 ld hl, (iy + 9) ; scale
44- ld c, a
45113 add hl, bc ; add expon
46- ld a, l
47114 bit 7, (iy + 11) ; scale sign
48- jr z, .scale_up
49- .scale_down:
50- ; HL is not INT_MIN here
51- dec hl
52- add hl, hl
53- jr nc, .finish ; expon > 0
54- ; expon <= 0 or subnormal
55- .underflow_to_zero:
56- ld hl, ___fe_cur_env
57- set 5, (hl) ; FE_INEXACT
58- if 0
59- ld a, (iy + 6)
60- and a, $80 ; copysign
61- else
62- xor a, a ; avoid returning negative zero with Ti's floats
63- end if
64- sbc hl, hl
65- .common_erange:
66- ld bc, 5 ; ERANGE
67- ld (_errno), bc
68- ld e, a
69- ret
70- .overflow_to_inf:
71- ld hl, $800000
72- ld a, (iy + 6)
73- or a, $7F ; copysign
74- jr .common_erange
75-
115+ jr nz, .scale_down
76116.scale_up:
77- ld bc, -255
117+ ld bc, -255 ; $FFFF01
78118 add hl, bc
79- jr c, .overflow_to_inf
80- .finish:
81- ld l, a
119+ jr c, __ldexpf_helper .overflow_to_inf
120+ ; sbc hl, bc ; restore hl
121+ dec l ; we only care about the low 8 bits
82122 ex de, hl
83- ; signbit(A) E:UHL >>= 1
84- ld a, (iy + 6) ; expon
123+ .finish_subnormal:
85124 push hl
86- rla
125+ .finish:
126+ rla ; extract signbit
87127 rr e
88128 rr (iy - 1)
89129 pop hl
90130 rr h
91131 rr l
92132 ret
93133
94- .maybe_subnormal:
95- dec bc ; BC is now -1
96- add hl, bc
97- jr c, .underflow_to_zero
98- ; return zero
99- .ret_self:
100- ld hl, (iy + 3)
101- ld e, (iy + 6)
102- ret
103-
104- else
105-
106- ; normal inputs are handled correctly, unless the output is subnormal
107- ; subnormal inputs are handled correctly for positive scaling values
108- ; subnormal outputs return zero and set ERANGE and FE_INEXACT for negative scaling values
109- ; zero/infinite/NaN inputs are handled correctly
110- _ldexpf:
111- _ldexp:
112- _scalbnf:
113- _scalbn:
114- ld iy, 0
115- lea bc, iy + 0
116- add iy, sp
117- ld hl, (iy + 3) ; mant
118- add hl, hl
119- ld e, (iy + 6) ; expon
120- ld a, e
121- rl e
122- jr z, .maybe_subnormal
123- inc e
124- jr z, .ret_self ; inf NaN
125- dec e
126- ld c, e
127- ex de, hl
128- ld hl, (iy + 9) ; scale
129- add hl, bc ; add expon
130- ld a, l
131- bit 7, (iy + 11) ; scale sign
132- jr z, .scale_up
134+ ;-------------------------------------------------------------------------------
133135.scale_down:
134- ; test signbit
135- push hl
136+ push de ; mant <<= 1
137+ ld e, l ; shift amount
136138 ; HL is not INT_MIN here
137139 dec hl
138140 add hl, hl
139- pop hl
140141 jr nc, .finish ; expon > 0
141- ; expon <= 0 or subnormal
142- ; jr .underflow_to_zero
143- .underflow_to_zero:
142+ ;-------------------------------------------------------------------------------
143+ .shru_to_subnormal:
144+ xor a, a
145+ sub a, e
146+ pop de
147+ ld c, 48 ; ld bc, 24 << 1
148+ add hl, bc
149+ jr nc, __ldexpf_helper.underflow_to_zero
150+
151+ set 7, (iy + 5) ; set implicit mantissa bit
152+ .shru_common:
153+ ; A should be [0, 23]
154+ ld b, a
155+ ld hl, (iy + 3) ; mantissa
156+ push hl ; ld (iy - 3), hl
157+ xor a, a
158+ inc b
159+ ; shift amount will be [1, 24]
160+ ld c, a ; ld c, 0
161+ ld d, (iy - 1)
162+ .shru_loop:
163+ adc a, c ; collect sticky bits
164+ srl d
165+ rr h
166+ rr l
167+ djnz .shru_loop
168+ ld (iy - 1), d
169+ pop de
170+ ld d, h
171+ ld e, l
172+
173+ ; round upwards to even if (round && (guard || sticky))
174+ jr nc, .no_round
175+ ; be careful not to touch the carry flag
176+ inc a
177+ dec a
178+ jr nz, .round_up
179+ bit 0, e ; test guard bit
180+ jr z, .no_round
181+ .round_up:
182+ inc de ; round upwards to even (wont overflow)
183+ .no_round:
184+ adc a, a
185+ jr z, .result_is_exact
186+ .underflow_hijack:
187+ .overflow_hijack:
144188 ld hl, ___fe_cur_env
145189 set 5, (hl) ; FE_INEXACT
146- ld a, (iy + 6)
190+ .result_is_exact:
191+ ld a, (iy + 6) ; get signbit
192+ ex de, hl
147193 and a, $80 ; copysign
148- sbc hl, hl
149- .common_erange:
150- ld bc, 5 ; ERANGE
151- ld (_errno), bc
194+ or a, b ; used for the overflow to infinite path
152195 ld e, a
153196 ret
154- .overflow_to_inf:
155- ld hl, $800000
156- ld a, (iy + 6)
157- or a, $7F ; copysign
158- jr .common_erange
159197
160- .scale_up:
161- ld bc, -255
162- add hl, bc
163- jr c, .overflow_to_inf
164- .finish:
165- ld l, a
166- ex de, hl
167- .finish_subnormal:
168- ; signbit(A) E:UHL >>= 1
169- ld a, (iy + 6) ; expon
170- push hl
171- rla
172- rr e
173- rr (iy - 1)
174- pop hl
175- rr h
176- rr l
177- ret
178-
179- .maybe_subnormal:
180- dec bc ; BC is now -1
181- add hl, bc
182- ; jr c, .underflow_to_zero
183- jr c, .subnormal
184- ; return zero
185- .ret_self:
186- ld hl, (iy + 3)
187- ld e, (iy + 6)
188- ret
189-
190- .subnormal:
191- ; BC is -1 here
192- bit 7, (iy + 11) ; scale sign
193- jr nz, .underflow_to_zero
194- ld de, (iy + 9) ; scale
195- ld hl, (iy + 3) ; mant
196- .norm_loop:
197- add hl, hl
198- jr c, .normalized
199- ex de, hl
200- add hl, bc ; --scale
201- ex de, hl
202- jr c, .norm_loop
203- ; .still_subnormal:
204- ld e, 0
205- jr .finish_subnormal
206- .normalized:
207- inc de
208- ex de, hl
209- ld a, l
210- jr .scale_up
211-
212- end if
213-
214- extern ___fe_cur_env
215198 extern _errno
199+ extern ___fe_cur_env
200+ extern __ictlz
216201
217202end if
0 commit comments