11 assume adl=1
22
3+ ;-------------------------------------------------------------------------------
4+
5+ section .text
6+
7+ public __ulltod
8+ ; (long double)unsigned long long
9+ __ulltod:
10+ cp a, a ; set Z flag
11+ push af
12+ jq __lltod_common
13+
14+ ;-------------------------------------------------------------------------------
15+
16+ section .text
17+
18+ public __lltod
19+ ; (long double)long long
20+ __lltod:
21+ bit 7, b
22+ push af
23+ call nz, __llneg ; abs(BC:UDE:UHL)
24+
25+ require __lltod_common
26+
27+ ;-------------------------------------------------------------------------------
28+
29+ section .text
30+
31+ private __lltod_common
32+ __lltod_common:
33+ call __llctlz
34+ sub a, 63 ; normalize clz_result
35+ ; filter out exponent of $000 (zero) and $3FF (one)
36+ jr nc, __int_to_f64_zero_or_one
37+ ; A is [-63, -1]
38+ add a, 52
39+ ; A is [-11, 51]
40+ jr c, __int_to_f64_shl
41+ ; __int_to_f64_shr:
42+ ; exponent = (1023 or $3FF or f64_bias) + base2_logarithm
43+ ; Minimum exponent: $434 (2^53)
44+ ; Maximum exponent: $43E (2^63)
45+ ; It is assumed that A is [-11, -1] here, or [-63, -53] before adding 52
46+ cpl
47+ inc a
48+ ; A is [1, 11]
49+ push hl
50+ push bc
51+ ld b, a
52+ ld c, 1
53+ .shift_loop:
54+ jr nc, .no_carry
55+ inc c
56+ .no_carry:
57+ srl h
58+ rr l
59+ djnz .shift_loop
60+ ; test round bit
61+ jr nc, .no_round
62+ ; test sticky bits
63+ dec c
64+ jr nz, .round_up
65+ ; test guard bit
66+ bit 0, l
67+ jr nc, .no_round
68+ .round_up:
69+ inc b ; round up after shifting
70+ .no_round:
71+ ld h, b
72+ pop bc
73+
74+ ld l, a
75+ ex (sp), hl ; (SP) = shift
76+ call __llshru
77+ ex (sp), hl ; (SP) = shifted HL, H = rounding, L = shift
78+ add a, 51
79+
80+ dec h
81+ push af
82+ ; exponent = ($400 + (base2_logarithm - 1)) << 4
83+ ; BC = $4EEM
84+ ld l, a
85+ ld h, $04
86+ ; clear the implicit mantissa bit
87+ res 4, c ; 52 % 8 == 4
88+ add hl, hl
89+ add hl, hl
90+ add hl, hl
91+ add hl, hl
92+ ld a, l
93+ or a, c
94+ ld c, a
95+ ld b, h
96+ pop af
97+ pop hl ; restore shifted HL
98+ call z, __lladd_1 ; round up to even
99+ jr __int_to_f64_shl.finish
100+
101+ ;-------------------------------------------------------------------------------
102+
103+ section .text
104+
105+ private __int_to_f64_zero_or_one
106+ __int_to_f64_zero_or_one:
107+ ; carry is cleared here
108+ ; UHL is either one or zero
109+ ld b, h
110+ ld c, h
111+ jr nz, .ret_zero
112+ ld bc, $3FF0
113+ dec hl ; ld hl, 0
114+ .ret_zero:
115+ ex de, hl
116+ sbc hl, hl
117+ jr __int_to_f64_shl.finish
118+
119+ ;-------------------------------------------------------------------------------
120+
3121 section .text
4122
5123 public __itod
6124; (long double)int
7125__itod:
8126 push hl
9127 add hl, hl ; extract signbit
128+ sbc hl, hl ; set Z flag
129+ ld e, l ; sign extend UHL to E:UHL
10130 pop hl
11- push af
12- ld e, 0
13- call c, __ineg ; abs(UHL)
14- jr __ltod.hijack
131+ jq __ltod
132+
133+ ;-------------------------------------------------------------------------------
15134
16135 section .text
17136
@@ -22,102 +141,104 @@ __utod:
22141
23142 require __ultod
24143
144+ ;-------------------------------------------------------------------------------
145+
25146 section .text
26147
27148 public __ultod
28149; (long double)unsigned long
29150__ultod:
30- or a, a
31- push af
32- jr __ltod.hijack
151+ cp a, a ; set Z flag
152+ push af
153+ jq __ltod_common
154+
155+ ;-------------------------------------------------------------------------------
33156
34157 section .text
35158
36159 public __ltod
37160; (long double)long
38161__ltod:
39- rlc e
162+ bit 7, e
163+
164+ require __ltod.hijack_itod
165+
166+ ;-------------------------------------------------------------------------------
167+
168+ section .text
169+
170+ private __ltod.hijack_itod
171+ __ltod.hijack_itod:
172+
40173 push af
41- rrc e
42- call c, __lneg ; abs(E:UHL)
174+ call nz, __lneg ; abs(E:UHL)
43175
44- require __ltod.hijack
176+ require __ltod_common
177+
178+ ;-------------------------------------------------------------------------------
45179
46180 section .text
47181
48- private __ltod.hijack
49- __ltod.hijack :
182+ private __ltod_common
183+ __ltod_common :
50184 call __lctlz
51- inc.s bc ; clear UBC
52- ld b, a ; <<= 8
53- xor a, $20 ; turns 32 into zero and clears carry flag
54- jr z, .zero
55- ; clears the MSB since the float will be normalized
56- ; x <<= clz_result + 1; /* shift by 32 is UB */
57- if 0
58- ; calculate the exponent
59- push hl
60- ; 1023 + 31 = 1054 = 0x41E
61- ld hl, $041E00
62- ld c, l ; ld c, 0
63- sbc hl, bc
64- ld l, e ; (expon16 << (16 + 24)) | (mant48)
65- ex de, hl
66- pop hl
185+ sub a, 31 ; normalize clz_result
67186
68- ; ld b, a
69- inc b
70- ld a, e
71- .loop32: ; shift by 32 is not UB here!
72- add hl, hl
73- rla
74- djnz .loop32
75- ld e, a
76- else
77- ; calculate the exponent
78- push hl
79- ; 1023 + 31 = 1054 = 0x41E
80- ld hl, $041E00
81- ld c, l ; ld c, 0
82- sbc hl, bc
83- ld l, e ; (expon16 << (16 + 24)) | (mant48)
84- ex de, hl
187+ ; filter out exponent of $000 (zero) and $3FF (one)
188+ jr nc, __int_to_f64_zero_or_one
189+ ; A is [-31, -1]
190+ add a, 52
191+ ; A is [21, 51]
85192
86- ld l, b
87- pop bc
88- ld a, e
89- call __lshl
90- push bc
91- pop hl
92- ; shift by 32 is UB
93- add hl, hl
94- rla
95- ld e, a
96- end if
193+ require __int_to_f64_shl
97194
98- ; UDE:D has expon, E:UHL has mant
99- ; Float64_mant_bits - uint48_bits = 4
100- ld c, 16 + 4
101- push bc
195+ ;-------------------------------------------------------------------------------
196+
197+ section .text
198+
199+ private __int_to_f64_shl
200+ __int_to_f64_shl:
201+ ; exponent = (1023 or $3FF or f64_bias) + base2_logarithm
202+ ; Minimum exponent: $400 (2^1)
203+ ; Maximum exponent: $434 (2^52)
204+ ; It is assumed that A is [0, 51] here, or [-52, -1] before adding 52
205+ push hl
206+ ld l, a
207+ ex (sp), hl ; (SP) = shift
102208 call __llshl
103- pop af ; reset SP
209+ ex (sp), hl ; (SP) = shifted HL, L = shift
210+
211+ ld a, 51
212+ sub a, l
213+
214+ ; exponent = ($400 + (base2_logarithm - 1)) << 4
215+ ; BC = $4EEM
216+ ld l, a
217+ ld h, $04
218+ ; clear the implicit mantissa bit
219+ res 4, c ; 52 % 8 == 4
220+ add hl, hl
221+ add hl, hl
222+ add hl, hl
223+ add hl, hl
224+ ld a, l
225+ or a, c
226+ ld c, a
227+ ld b, h
228+ pop hl ; restore shifted HL
104229.finish:
105230 pop af
106- ret nc ; positive
231+ ret z
107232 set 7, b
108- ret ; negative
109-
110- .zero:
111- ; E:UHL and A are zero
112- ex de, hl
113- sbc hl, hl
114- ld b, e
115- ld c, e
116- pop af
117233 ret
118234
235+ ;-------------------------------------------------------------------------------
236+
119237 extern __ineg
120238 extern __lneg
121239 extern __lctlz
122- extern __lshl
240+ extern __llctlz
123241 extern __llshl
242+ extern __llshru
243+ extern __llneg
244+ extern __lladd_1
0 commit comments