Skip to content

Commit 2ab0333

Browse files
committed
rationalize pico_float/pico_double libraries
* on RP2350 _dcp variant now enables -msoft-float, since if you're using this at all it is likely because you don't want to use the VFP unit at all (to save stack space) * implement all float_ and double_ conversion functions in all pico_float_pico_ variants and pico_double_pico on RP2040 and RP2350 (many were missing in some combinations) * provide better granularity of what functions are wrapped in each case
1 parent c54475d commit 2ab0333

File tree

15 files changed

+1790
-127
lines changed

15 files changed

+1790
-127
lines changed

src/rp2_common/pico_double/double_aeabi_dcp.S

Lines changed: 87 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -128,53 +128,131 @@ saving_func wrapper sqrt
128128
dcp_dsqrt_m r0,r1,r0,r1,r0,r1,r2,r3,r12
129129
saving_func_return
130130

131-
// todo not a real thing
132-
double_wrapper_section __aeabi_dclassify
133-
saving_func wrapper __aeabi_dclassify
131+
double_section dclassify
132+
saving_func regular dclassify
134133
@ with correct rounding
135134
dcp_dclassify_m apsr_nzcv,r0,r1
136135
saving_func_return
137136

138137
// ============== CONVERSION FUNCTIONS ===============
139138

140139
double_wrapper_section __aeabi_d2f
140+
regular_func double2float
141141
saving_func wrapper __aeabi_d2f
142142
@ with rounding
143143
dcp_double2float_m r0,r0,r1
144144
saving_func_return
145145

146146
double_wrapper_section __aeabi_i2d
147+
regular_func int2double
147148
saving_func wrapper __aeabi_i2d
148149
dcp_int2double_m r0,r1,r0
149150
saving_func_return
150151

151152
double_wrapper_section __aeabi_ui2d
153+
regular_func uint2double
152154
saving_func wrapper __aeabi_ui2d
153155
dcp_uint2double_m r0,r1,r0
154156
saving_func_return
155157

158+
double_section double2fix_z
159+
saving_func regular double2fix_z
160+
ubfx r3, r1, #20, #11
161+
adds r3, r2
162+
beq 1f // very small; we don't care that we might make a denormal
163+
asrs ip, r3, #11
164+
beq 1f
165+
ite pl
166+
movpl r3, #0x7ff
167+
movsmi r3, #0
168+
1:
169+
bfi r1, r3, #20, #11
170+
b double2int_z_entry
171+
172+
double_section double2ufix
173+
regular_func double2ufix
174+
saving_func regular double2ufix_z
175+
double2ufix_z_entry:
176+
ubfx r3, r1, #20, #11
177+
adds r3, r2
178+
beq 1f // very small; we don't care that we might make a denormal
179+
asrs ip, r3, #11
180+
beq 1f
181+
ite pl
182+
lsrspl r3, r1, #20 // 0x7ff
183+
movsmi r3, #0
184+
1:
185+
bfi r1, r3, #20, #11
186+
b double2uint_z_entry
187+
188+
double_section double2fix
189+
saving_func regular double2fix
190+
ubfx r3, r1, #20, #11
191+
adds r3, r2
192+
beq 1f // very small; we don't care that we might make a denormal
193+
asrs ip, r3, #11
194+
beq 1f
195+
ite pl
196+
movpl r3, #0x7ff
197+
movsmi r3, #0
198+
1:
199+
bfi r1, r3, #20, #11
200+
b double2int_entry
201+
202+
double_section double2int
203+
saving_func regular double2int
204+
double2int_entry:
205+
lsls r2, r1, #1
206+
// r1 = abs(zero) => r1 = 0x00000000
207+
// r1 = abs(denornaml) => r1 = 0x00.xxxxx
208+
// r1 = abs(1.0f) => r1 = 0x7f800000
209+
// r1 = abs(inf/nan) => r1 = 0xffXxxxxx
210+
bcc double2int_z_entry // positive is ok for int64_z
211+
orrs r3, r2, r0
212+
beq double2int_z_entry // 0 or -0 is ok for int64_z
213+
// r3 = last 3 bits of 23 significant bits of mantissa at position 32-23
214+
lsrs r3, r0, #32 - 3
215+
lsls r3, #9
216+
217+
lsrs r2, #21
218+
adds r2, #1
219+
subs r2, r2, #0x400
220+
bcc 1f // <1 means subtract 1
221+
// recreate the 23 significant bits of mantissa for float at the top of r3
222+
adds r3, r3, r1, lsl #12
223+
lsls r3, r2
224+
beq double2int_z_entry // integer
225+
1:
226+
dcp_double2int_m r0,r0,r1
227+
subs r0, #1
228+
saving_func_return
229+
156230
double_wrapper_section __aeabi_d2iz
231+
regular_func double2int_z
157232
saving_func wrapper __aeabi_d2iz
233+
double2int_z_entry:
158234
@ with truncation towards 0
159235
dcp_double2int_m r0,r0,r1
236+
// note: this works with either saved or not saved call as it is just a `bx lr`
160237
saving_func_return
161238

162239
double_wrapper_section __aeabi_d2uiz
240+
regular_func double2uint_z
241+
regular_func double2uint
163242
saving_func wrapper __aeabi_d2uiz
243+
double2uint_z_entry:
164244
@ with truncation towards 0
165245
dcp_double2uint_m r0,r0,r1
166246
saving_func_return
167247

168-
// todo not a real thing
169-
double_wrapper_section __aeabi_d2i_r
170-
saving_func wrapper __aeabi_d2i_r
248+
double_section double2int_r
249+
saving_func regular double2int_r
171250
@ with rounding
172251
dcp_double2int_r_m r0,r0,r1
173252
saving_func_return
174253

175-
// todo not a real thing
176-
double_wrapper_section __aeabi_d2ui_r
177-
saving_func wrapper __aeabi_d2ui_r
254+
double_section double2uint_r
255+
saving_func regular double2uint_r
178256
@ with rounding
179257
dcp_double2uint_r_m r0,r0,r1
180258
saving_func_return

src/rp2_common/pico_double/double_aeabi_rp2040.S

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,13 +425,15 @@ double_wrapper_section __aeabi_ui2d
425425
double_wrapper_section __aeabi_i2d
426426

427427
wrapper_func __aeabi_ui2d
428+
regular_func uint2double
428429
movs r1, #0
429430
cmp r0, #0
430431
bne 2f
431432
1:
432433
bx lr
433434
// double FUNC_NAME(__aeabi_i2d)(int) integer to double (double precision) conversion
434435
wrapper_func __aeabi_i2d
436+
regular_func int2double
435437
asrs r1, r0, #31
436438
eors r0, r1
437439
subs r0, r1
@@ -506,6 +508,7 @@ regular_func double2int
506508
// unsigned FUNC_NAME(__aeabi_d2uiz)(double) double (double precision) to unsigned C-style conversion [3]
507509
double_wrapper_section __aeabi_d2uiz
508510
wrapper_func __aeabi_d2uiz
511+
regular_func double2uint_z
509512
regular_func double2uint
510513
shimmable_table_tail_call SF_TABLE_FLOAT2UINT double2uint_shim
511514

@@ -528,11 +531,13 @@ regular_func ufix642double
528531
// double FUNC_NAME(__aeabi_l2d)(long long) long long to double (double precision) conversion
529532
double_wrapper_section __aeabi_l2d
530533
wrapper_func __aeabi_l2d
534+
regular_func int642double
531535
shimmable_table_tail_call SF_TABLE_INT642FLOAT int642double_shim
532536

533537
// double FUNC_NAME(__aeabi_l2f)(long long) long long to double (double precision) conversion
534538
double_wrapper_section __aeabi_ul2d
535539
wrapper_func __aeabi_ul2d
540+
regular_func uint642double
536541
shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642double_shim
537542

538543
// long long FUNC_NAME(__aeabi_d2lz)(double) double (double precision) to long long C-style conversion [3]
@@ -566,14 +571,55 @@ regular_func double2int64
566571
// unsigned long long FUNC_NAME(__aeabi_d2ulz)(double) double to unsigned long long C-style conversion [3]
567572
double_wrapper_section __aeabi_d2ulz
568573
wrapper_func __aeabi_d2ulz
574+
regular_func double2uint64
575+
regular_func double2uint64_z
569576
shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 double2uint64_shim
570577

578+
double_section double2fix64_z
579+
regular_func double2fix64_z
580+
lsls r3, r1, #1
581+
bcc double2fix64 // positive is ok for fix64
582+
push {r1, r2}
583+
lsrs r3, #21
584+
585+
adds r3, r2
586+
movs r2, #1
587+
negs r2, r2
588+
lsrs r2, #22
589+
subs r3, r2
590+
591+
bcc 1f // <1 means subtract 1
592+
cmp r3, #52
593+
bge double2fix64 // must be an integer
594+
595+
lsls r2, r1, #12
596+
lsrs r1, r0, #20
597+
adds r2, r1
598+
// r2 now has highest 32 mantissa bits
599+
lsls r2, r3
600+
bne 1f // not integer as non zero fractional bits remain
601+
lsls r2, r0, #12
602+
lsls r2, r3
603+
bne 1f
604+
// integer
605+
pop {r1, r2}
606+
b double2fix64
607+
1:
608+
pop {r1, r2}
609+
push {lr}
610+
bl double2fix64
611+
movs r2, #0
612+
adds r0, #1
613+
adcs r1, r2
614+
pop {pc}
615+
571616
double_section double2fix64
572617
regular_func double2fix64
573618
shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 double2fix64_shim
574619

575620
double_section double2ufix64
576621
regular_func double2ufix64
622+
regular_func double2ufix64_z
577623
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 double2ufix64_shim
578624

579625
double_section double2fix
@@ -582,6 +628,7 @@ regular_func double2fix
582628

583629
double_section double2ufix
584630
regular_func double2ufix
631+
regular_func double2ufix_z
585632
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX double2ufix_shim
586633

587634
double_wrapper_section __aeabi_d2f

src/rp2_common/pico_double/double_conv_m33.S

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,61 @@ regular_func ufix2double
249249
movs r1,#0
250250
bx r14
251251

252-
double_wrapper_section conv_dtoi64
252+
double_section conv_dtoi64
253+
regular_func double2int64
254+
lsls r2, r1, #1
255+
bcc double2int64_z // positive is ok for int64_z
256+
orrs r3, r2, r0
257+
beq double2int64_z // 0 or -0 is ok for int64_z
258+
259+
lsrs r2, #21
260+
adds r2, #1
261+
subs r2, r2, #0x400
262+
bcc 1f // <1 means subtract 1
263+
cmp r2, #52
264+
bge double2int64_z // must be an integer
265+
lsls r3, r1, #12
266+
adds r3, r3, r0, lsr #20
267+
// r3 now has highest 32 mantissa bits
268+
lsls r3, r2
269+
bne 1f // not integer as non zero fractional bits remain
270+
lsls r3, r0, #12
271+
lsls r3, r2
272+
beq double2int64_z // integer
273+
1:
274+
push {lr}
275+
bl double2int64_z
276+
subs r0, #1
277+
sbcs r1, r1, #0
278+
pop {pc}
279+
280+
double_section conv_dtofix64
281+
regular_func double2fix64
282+
lsls r3, r1, #1
283+
bcc double2fix64_z // positive is ok for int64_z
284+
lsrs r3, #21
285+
adds r3, #1
286+
rsb ip, r2, #0x400
287+
subs r3, ip
288+
bcc 1f // <1 means subtract 1
289+
cmp r3, #52
290+
bge double2fix64_z // must be an integer
291+
lsls ip, r1, #12
292+
adds ip, ip, r0, lsr #20
293+
// ip now has highest 32 mantissa bits
294+
lsls ip, r3
295+
bne 1f // not integer as non zero fractional bits remain
296+
lsls ip, r0, #12
297+
lsls ip, r3
298+
beq double2fix64_z // integer
299+
1:
300+
push {lr}
301+
bl double2fix64_z
302+
subs r0, #1
303+
sbcs r1, r1, #0
304+
pop {pc}
305+
306+
double_wrapper_section conv_dtoi64_z
253307

254308
@ convert double to signed int64, rounding towards 0, clamping
255309
wrapper_func __aeabi_d2lz

src/rp2_common/pico_double/double_fma_dcp.S

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,7 @@ wrapper_func fma
582582
saving_func_return
583583

584584

585-
double_wrapper_section __dmla
585+
double_section fma_fast
586586
@ cf saving_func macro: but here we need to record the SP before the state save possibly changes it
587587
1:
588588
push {lr} // 16-bit instruction
@@ -592,6 +592,7 @@ double_wrapper_section __dmla
592592
@ r0:r1 m
593593
@ r2:r3 n
594594
@ [r13,#0] a
595+
regular_func fma_fast
595596
regular_func mla
596597
mov r12,sp @ save the SP
597598
PCMP apsr_nzcv @ test the engaged flag

0 commit comments

Comments
 (0)