Skip to content

Commit e85c3e5

Browse files
authored
rationalize pico_float/pico_double libraries (#2208)
* on RP2350 _dcp variant now enables -msoft-float, since if you're using this at all it is likely because you don't want to use the VFP unit at all (to save stack space) * implement all float_ and double_ conversion functions in all pico_float_pico_ variants and pico_double_pico on RP2040 and RP2350 (many were missing in some combinations) * provide better granularity of what functions are wrapped in each case also marked custom_xxx_funcs_test.c as not in bazel build yet
1 parent 7d450bf commit e85c3e5

File tree

17 files changed

+2011
-141
lines changed

17 files changed

+2011
-141
lines changed

src/rp2_common/hardware_dma/include/hardware/dma.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ static inline void dma_channel_start(uint channel) {
535535
*\endcode
536536
*
537537
* \if rp2350_specific
538-
* RP2350 only: Due to errata RP12350-E5 (see the RP2350 datasheet for further detail), it is necessary to clear the enable bit of
538+
* RP2350 only: Due to errata RP2350-E5 (see the RP2350 datasheet for further detail), it is necessary to clear the enable bit of
539539
* the aborted channel and any chained channels prior to the abort to prevent re-triggering.
540540
* \endif
541541
*

src/rp2_common/pico_double/double_aeabi_dcp.S

Lines changed: 94 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include "pico/asm_helper.S"
88

99
#if !HAS_DOUBLE_COPROCESSOR
10-
#error attempt to compile double_aeabi_rp2350 when there is no DCP
10+
#error attempt to compile double_aeabi_dcp when there is no DCP
1111
#else
1212

1313
#include "hardware/dcp_instr.inc.S"
@@ -29,7 +29,7 @@ double_section WRAPPER_FUNC_NAME(\func)
2929

3030
// ============== STATE SAVE AND RESTORE ===============
3131

32-
.macro saving_func type func
32+
.macro saving_func type func, opt_label1='-', opt_label2='-'
3333
// Note we are usually 32-bit aligned already at this point, as most of the
3434
// function bodies contain exactly two 16-bit instructions: bmi and bx lr.
3535
// We want the PCMP word-aligned.
@@ -41,6 +41,12 @@ double_section WRAPPER_FUNC_NAME(\func)
4141
push {lr} // 16-bit instruction
4242
bl generic_save_state // 32-bit instruction
4343
b 1f // 16-bit instruction
44+
.ifnc \opt_label1,'-'
45+
regular_func \opt_label1
46+
.endif
47+
.ifnc \opt_label2,'-'
48+
regular_func \opt_label2
49+
.endif
4450
// This is the actual entry point:
4551
\type\()_func \func
4652
PCMP apsr_nzcv
@@ -128,53 +134,124 @@ saving_func wrapper sqrt
128134
dcp_dsqrt_m r0,r1,r0,r1,r0,r1,r2,r3,r12
129135
saving_func_return
130136

131-
// todo not a real thing
132-
double_wrapper_section __aeabi_dclassify
133-
saving_func wrapper __aeabi_dclassify
134-
@ with correct rounding
137+
double_section dclassify
138+
saving_func regular dclassify
135139
dcp_dclassify_m apsr_nzcv,r0,r1
136140
saving_func_return
137141

138142
// ============== CONVERSION FUNCTIONS ===============
139143

140144
double_wrapper_section __aeabi_d2f
141-
saving_func wrapper __aeabi_d2f
145+
saving_func wrapper __aeabi_d2f double2float
142146
@ with rounding
143147
dcp_double2float_m r0,r0,r1
144148
saving_func_return
145149

146150
double_wrapper_section __aeabi_i2d
147-
saving_func wrapper __aeabi_i2d
151+
saving_func wrapper __aeabi_i2d int2double
148152
dcp_int2double_m r0,r1,r0
149153
saving_func_return
150154

151155
double_wrapper_section __aeabi_ui2d
152-
saving_func wrapper __aeabi_ui2d
156+
saving_func wrapper __aeabi_ui2d uint2double
153157
dcp_uint2double_m r0,r1,r0
154158
saving_func_return
155159

160+
double_section double2fix_z
161+
saving_func regular double2fix_z
162+
ubfx r3, r1, #20, #11
163+
adds r3, r2
164+
beq 1f // very small; we don't care that we might make a denormal
165+
asrs ip, r3, #11
166+
beq 1f
167+
ite pl
168+
movpl r3, #0x7ff
169+
movsmi r3, #0
170+
1:
171+
bfi r1, r3, #20, #11
172+
b double2int_z_entry
173+
174+
double_section double2ufix
175+
saving_func regular double2ufix_z double2ufix
176+
double2ufix_z_entry:
177+
ubfx r3, r1, #20, #11
178+
adds r3, r2
179+
beq 1f // very small; we don't care that we might make a denormal
180+
asrs ip, r3, #11
181+
beq 1f
182+
ite pl
183+
lsrspl r3, r1, #20 // 0x7ff
184+
movsmi r3, #0
185+
1:
186+
bfi r1, r3, #20, #11
187+
b double2uint_z_entry
188+
189+
double_section double2fix
190+
saving_func regular double2fix
191+
ubfx r3, r1, #20, #11
192+
cbz r3, 2f // 0 or denormal
193+
adds r3, r2
194+
beq 1f // very small; we don't care that we might make a denormal
195+
asrs ip, r3, #11
196+
beq 1f
197+
ite pl
198+
movpl r3, #0x7ff
199+
movsmi r3, #0
200+
1:
201+
bfi r1, r3, #20, #11
202+
b double2int_entry
203+
2:
204+
movs r0, #0
205+
saving_func_return
206+
207+
208+
double_section double2int
209+
saving_func regular double2int
210+
double2int_entry:
211+
lsls r2, r1, #1
212+
bcc double2int_z_entry // positive is ok for int64_z
213+
lsrs r3, r2, #21
214+
beq double2int_z_entry // 0 or -0 or denormal is ok for int_z
215+
216+
lsrs r2, #21
217+
adds r2, #1
218+
subs r2, r2, #0x400
219+
bcc 1f // <1 means subtract 1
220+
cmp r2, #31
221+
bge double2int_z_entry // must be an integer or maxed out
222+
lsls r3, r1, #12
223+
adds r3, r3, r0, lsr #20 // r3 now has highest 32 mantissa bits
224+
lsls r3, r2
225+
orrs r3, r3, r0, lsl #12 // these bits are all guaranteed to be in the fraction
226+
beq double2int_z_entry // integer
227+
1:
228+
dcp_double2int_m r0,r0,r1
229+
subs r0, #1
230+
saving_func_return
231+
156232
double_wrapper_section __aeabi_d2iz
157-
saving_func wrapper __aeabi_d2iz
233+
saving_func wrapper __aeabi_d2iz double2int_z
234+
double2int_z_entry:
158235
@ with truncation towards 0
159236
dcp_double2int_m r0,r0,r1
237+
// note: this works with either saved or not saved call as it is just a `bx lr`
160238
saving_func_return
161239

162240
double_wrapper_section __aeabi_d2uiz
163-
saving_func wrapper __aeabi_d2uiz
241+
saving_func wrapper __aeabi_d2uiz double2uint double2uint_z
242+
double2uint_z_entry:
164243
@ with truncation towards 0
165244
dcp_double2uint_m r0,r0,r1
166245
saving_func_return
167246

168-
// todo not a real thing
169-
double_wrapper_section __aeabi_d2i_r
170-
saving_func wrapper __aeabi_d2i_r
247+
double_section double2int_r
248+
saving_func regular double2int_r
171249
@ with rounding
172250
dcp_double2int_r_m r0,r0,r1
173251
saving_func_return
174252

175-
// todo not a real thing
176-
double_wrapper_section __aeabi_d2ui_r
177-
saving_func wrapper __aeabi_d2ui_r
253+
double_section double2uint_r
254+
saving_func regular double2uint_r
178255
@ with rounding
179256
dcp_double2uint_r_m r0,r0,r1
180257
saving_func_return
@@ -189,7 +266,6 @@ saving_func wrapper __aeabi_dcmpun
189266
saving_func_return
190267

191268
double_wrapper_section __aeabi_dcmp
192-
193269
saving_func wrapper __aeabi_cdrcmple
194270
dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 // with arguments reversed
195271
bvs cmp_nan

src/rp2_common/pico_double/double_aeabi_rp2040.S

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,13 +425,15 @@ double_wrapper_section __aeabi_ui2d
425425
double_wrapper_section __aeabi_i2d
426426

427427
wrapper_func __aeabi_ui2d
428+
regular_func uint2double
428429
movs r1, #0
429430
cmp r0, #0
430431
bne 2f
431432
1:
432433
bx lr
433434
// double FUNC_NAME(__aeabi_i2d)(int) integer to double (double precision) conversion
434435
wrapper_func __aeabi_i2d
436+
regular_func int2double
435437
asrs r1, r0, #31
436438
eors r0, r1
437439
subs r0, r1
@@ -506,6 +508,7 @@ regular_func double2int
506508
// unsigned FUNC_NAME(__aeabi_d2uiz)(double) double (double precision) to unsigned C-style conversion [3]
507509
double_wrapper_section __aeabi_d2uiz
508510
wrapper_func __aeabi_d2uiz
511+
regular_func double2uint_z
509512
regular_func double2uint
510513
shimmable_table_tail_call SF_TABLE_FLOAT2UINT double2uint_shim
511514

@@ -528,11 +531,13 @@ regular_func ufix642double
528531
// double FUNC_NAME(__aeabi_l2d)(long long) long long to double (double precision) conversion
529532
double_wrapper_section __aeabi_l2d
530533
wrapper_func __aeabi_l2d
534+
regular_func int642double
531535
shimmable_table_tail_call SF_TABLE_INT642FLOAT int642double_shim
532536

533537
// double FUNC_NAME(__aeabi_l2f)(long long) long long to double (double precision) conversion
534538
double_wrapper_section __aeabi_ul2d
535539
wrapper_func __aeabi_ul2d
540+
regular_func uint642double
536541
shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642double_shim
537542

538543
// long long FUNC_NAME(__aeabi_d2lz)(double) double (double precision) to long long C-style conversion [3]
@@ -566,22 +571,106 @@ regular_func double2int64
566571
// unsigned long long FUNC_NAME(__aeabi_d2ulz)(double) double to unsigned long long C-style conversion [3]
567572
double_wrapper_section __aeabi_d2ulz
568573
wrapper_func __aeabi_d2ulz
574+
regular_func double2uint64
575+
regular_func double2uint64_z
569576
shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 double2uint64_shim
570577

578+
double_section double2fix64_z
579+
regular_func double2fix64_z
580+
lsls r3, r1, #1
581+
bcc double2fix64 // input positive is ok for fix64
582+
mov ip, r2
583+
asrs r2, r3, #21
584+
beq 3f // input zero or denormal, so just return zero
585+
adds r2, #1
586+
beq double2fix64 // input infinite/nan is ok for fix64
587+
588+
lsrs r3, #21
589+
add r3, ip
590+
movs r2, #1
591+
negs r2, r2
592+
lsrs r2, #22
593+
subs r3, r2 // r3 = modified e - 0x3ff
594+
595+
bcc 3f // modified input < 1.0 means result is zero
596+
cmp r3, #52
597+
bge 2f // modified input must be an integer or infinite
598+
599+
adds r3, #12
600+
mov r2, r1
601+
lsls r2, r2, r3 // r2 has remaining fractional mantissa bits of r1
602+
bne 1f // not integer as non zero fractional bits remain
603+
subs r3, #32
604+
asrs r2, r3, #31
605+
bics r3, r3, r2
606+
movs r2, r0
607+
lsls r2, r2, r3
608+
bne 1f // remaining fractional bits are non-zero, so argument was not an integer
609+
2:
610+
// integer
611+
mov r2, ip
612+
b double2fix64
613+
3: // result is zero
614+
movs r0, #0
615+
movs r1, #0
616+
bx lr
617+
1:
618+
push {lr}
619+
mov r2, ip
620+
bl double2fix64
621+
movs r2, #0
622+
adds r0, #1
623+
adcs r1, r2
624+
pop {pc}
625+
571626
double_section double2fix64
572627
regular_func double2fix64
573628
shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 double2fix64_shim
574629

575630
double_section double2ufix64
576631
regular_func double2ufix64
632+
regular_func double2ufix64_z
577633
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 double2ufix64_shim
578634

579635
double_section double2fix
580636
regular_func double2fix
581637
shimmable_table_tail_call SF_TABLE_FLOAT2FIX double2fix_shim
582638

639+
double_section double2fix_z
640+
regular_func double2fix_z
641+
lsls r3, r1, #1
642+
asrs r3, #21
643+
beq 2f // input is zero or denormal
644+
adds r3, #1
645+
beq 3f // input is infinite or nan
646+
647+
// extract exponent again
648+
lsls r3, r1, #1
649+
lsrs r3, #21
650+
// adjust
651+
adds r3, r2
652+
ble 2f // adjusted input is zero or dedornmal or < 1
653+
lsrs r3, r3, #11
654+
bne 3f // adjusted input is > infinite
655+
656+
lsls r2, r2, #20 // align exponent adjustment offset
657+
adds r1, r1, r2 // we know adjustment is safe
658+
b double2int_z
659+
2:
660+
// result is zero
661+
movs r0, #0
662+
bx lr
663+
3:
664+
movs r0, #0
665+
subs r0, #1
666+
lsrs r0, #1
667+
asrs r1, #31
668+
eors r0, r1
669+
bx lr
670+
583671
double_section double2ufix
584672
regular_func double2ufix
673+
regular_func double2ufix_z
585674
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX double2ufix_shim
586675

587676
double_wrapper_section __aeabi_d2f

0 commit comments

Comments
 (0)