Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/rp2_common/hardware_dma/include/hardware/dma.h
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ static inline void dma_channel_start(uint channel) {
*\endcode
*
* \if rp2350_specific
* RP2350 only: Due to errata RP12350-E5 (see the RP2350 datasheet for further detail), it is necessary to clear the enable bit of
* RP2350 only: Due to errata RP2350-E5 (see the RP2350 datasheet for further detail), it is necessary to clear the enable bit of
* the aborted channel and any chained channels prior to the abort to prevent re-triggering.
* \endif
*
Expand Down
112 changes: 94 additions & 18 deletions src/rp2_common/pico_double/double_aeabi_dcp.S
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include "pico/asm_helper.S"

#if !HAS_DOUBLE_COPROCESSOR
#error attempt to compile double_aeabi_rp2350 when there is no DCP
#error attempt to compile double_aeabi_dcp when there is no DCP
#else

#include "hardware/dcp_instr.inc.S"
Expand All @@ -29,7 +29,7 @@ double_section WRAPPER_FUNC_NAME(\func)

// ============== STATE SAVE AND RESTORE ===============

.macro saving_func type func
.macro saving_func type func, opt_label1='-', opt_label2='-'
// Note we are usually 32-bit aligned already at this point, as most of the
// function bodies contain exactly two 16-bit instructions: bmi and bx lr.
// We want the PCMP word-aligned.
Expand All @@ -41,6 +41,12 @@ double_section WRAPPER_FUNC_NAME(\func)
push {lr} // 16-bit instruction
bl generic_save_state // 32-bit instruction
b 1f // 16-bit instruction
.ifnc \opt_label1,'-'
regular_func \opt_label1
.endif
.ifnc \opt_label2,'-'
regular_func \opt_label2
.endif
// This is the actual entry point:
\type\()_func \func
PCMP apsr_nzcv
Expand Down Expand Up @@ -128,53 +134,124 @@ saving_func wrapper sqrt
dcp_dsqrt_m r0,r1,r0,r1,r0,r1,r2,r3,r12
saving_func_return

// todo not a real thing
double_wrapper_section __aeabi_dclassify
saving_func wrapper __aeabi_dclassify
@ with correct rounding
double_section dclassify
saving_func regular dclassify
dcp_dclassify_m apsr_nzcv,r0,r1
saving_func_return

// ============== CONVERSION FUNCTIONS ===============

double_wrapper_section __aeabi_d2f
saving_func wrapper __aeabi_d2f
saving_func wrapper __aeabi_d2f double2float
@ with rounding
dcp_double2float_m r0,r0,r1
saving_func_return

double_wrapper_section __aeabi_i2d
saving_func wrapper __aeabi_i2d
saving_func wrapper __aeabi_i2d int2double
dcp_int2double_m r0,r1,r0
saving_func_return

double_wrapper_section __aeabi_ui2d
saving_func wrapper __aeabi_ui2d
saving_func wrapper __aeabi_ui2d uint2double
dcp_uint2double_m r0,r1,r0
saving_func_return

double_section double2fix_z
saving_func regular double2fix_z
ubfx r3, r1, #20, #11
adds r3, r2
beq 1f // very small; we don't care that we might make a denormal
asrs ip, r3, #11
beq 1f
ite pl
movpl r3, #0x7ff
movsmi r3, #0
1:
bfi r1, r3, #20, #11
b double2int_z_entry

double_section double2ufix
saving_func regular double2ufix_z double2ufix
double2ufix_z_entry:
ubfx r3, r1, #20, #11
adds r3, r2
beq 1f // very small; we don't care that we might make a denormal
asrs ip, r3, #11
beq 1f
ite pl
lsrspl r3, r1, #20 // 0x7ff
movsmi r3, #0
1:
bfi r1, r3, #20, #11
b double2uint_z_entry

double_section double2fix
saving_func regular double2fix
ubfx r3, r1, #20, #11
cbz r3, 2f // 0 or denormal
adds r3, r2
beq 1f // very small; we don't care that we might make a denormal
asrs ip, r3, #11
beq 1f
ite pl
movpl r3, #0x7ff
movsmi r3, #0
1:
bfi r1, r3, #20, #11
b double2int_entry
2:
movs r0, #0
saving_func_return


double_section double2int
saving_func regular double2int
double2int_entry:
lsls r2, r1, #1
bcc double2int_z_entry // positive is ok for int64_z
lsrs r3, r2, #21
beq double2int_z_entry // 0 or -0 or denormal is ok for int_z

lsrs r2, #21
adds r2, #1
subs r2, r2, #0x400
bcc 1f // <1 means subtract 1
cmp r2, #31
bge double2int_z_entry // must be an integer or maxed out
lsls r3, r1, #12
adds r3, r3, r0, lsr #20 // r3 now has highest 32 mantissa bits
lsls r3, r2
orrs r3, r3, r0, lsl #12 // these bits are all guaranteed to be in the fraction
beq double2int_z_entry // integer
1:
dcp_double2int_m r0,r0,r1
subs r0, #1
saving_func_return

double_wrapper_section __aeabi_d2iz
saving_func wrapper __aeabi_d2iz
saving_func wrapper __aeabi_d2iz double2int_z
double2int_z_entry:
@ with truncation towards 0
dcp_double2int_m r0,r0,r1
// note: this works with either saved or not saved call as it is just a `bx lr`
saving_func_return

double_wrapper_section __aeabi_d2uiz
saving_func wrapper __aeabi_d2uiz
saving_func wrapper __aeabi_d2uiz double2uint double2uint_z
double2uint_z_entry:
@ with truncation towards 0
dcp_double2uint_m r0,r0,r1
saving_func_return

// todo not a real thing
double_wrapper_section __aeabi_d2i_r
saving_func wrapper __aeabi_d2i_r
double_section double2int_r
saving_func regular double2int_r
@ with rounding
dcp_double2int_r_m r0,r0,r1
saving_func_return

// todo not a real thing
double_wrapper_section __aeabi_d2ui_r
saving_func wrapper __aeabi_d2ui_r
double_section double2uint_r
saving_func regular double2uint_r
@ with rounding
dcp_double2uint_r_m r0,r0,r1
saving_func_return
Expand All @@ -189,7 +266,6 @@ saving_func wrapper __aeabi_dcmpun
saving_func_return

double_wrapper_section __aeabi_dcmp

saving_func wrapper __aeabi_cdrcmple
dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 // with arguments reversed
bvs cmp_nan
Expand Down
89 changes: 89 additions & 0 deletions src/rp2_common/pico_double/double_aeabi_rp2040.S
Original file line number Diff line number Diff line change
Expand Up @@ -425,13 +425,15 @@ double_wrapper_section __aeabi_ui2d
double_wrapper_section __aeabi_i2d

wrapper_func __aeabi_ui2d
regular_func uint2double
movs r1, #0
cmp r0, #0
bne 2f
1:
bx lr
// double FUNC_NAME(__aeabi_i2d)(int) integer to double (double precision) conversion
wrapper_func __aeabi_i2d
regular_func int2double
asrs r1, r0, #31
eors r0, r1
subs r0, r1
Expand Down Expand Up @@ -506,6 +508,7 @@ regular_func double2int
// unsigned FUNC_NAME(__aeabi_d2uiz)(double) double (double precision) to unsigned C-style conversion [3]
double_wrapper_section __aeabi_d2uiz
wrapper_func __aeabi_d2uiz
regular_func double2uint_z
regular_func double2uint
shimmable_table_tail_call SF_TABLE_FLOAT2UINT double2uint_shim

Expand All @@ -528,11 +531,13 @@ regular_func ufix642double
// double FUNC_NAME(__aeabi_l2d)(long long) long long to double (double precision) conversion
double_wrapper_section __aeabi_l2d
wrapper_func __aeabi_l2d
regular_func int642double
shimmable_table_tail_call SF_TABLE_INT642FLOAT int642double_shim

// double FUNC_NAME(__aeabi_l2f)(long long) long long to double (double precision) conversion
double_wrapper_section __aeabi_ul2d
wrapper_func __aeabi_ul2d
regular_func uint642double
shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642double_shim

// long long FUNC_NAME(__aeabi_d2lz)(double) double (double precision) to long long C-style conversion [3]
Expand Down Expand Up @@ -566,22 +571,106 @@ regular_func double2int64
// unsigned long long FUNC_NAME(__aeabi_d2ulz)(double) double to unsigned long long C-style conversion [3]
double_wrapper_section __aeabi_d2ulz
wrapper_func __aeabi_d2ulz
regular_func double2uint64
regular_func double2uint64_z
shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 double2uint64_shim

double_section double2fix64_z
regular_func double2fix64_z
lsls r3, r1, #1
bcc double2fix64 // input positive is ok for fix64
mov ip, r2
asrs r2, r3, #21
beq 3f // input zero or denormal, so just return zero
adds r2, #1
beq double2fix64 // input infinite/nan is ok for fix64

lsrs r3, #21
add r3, ip
movs r2, #1
negs r2, r2
lsrs r2, #22
subs r3, r2 // r3 = modified e - 0x3ff

bcc 3f // modified input < 1.0 means result is zero
cmp r3, #52
bge 2f // modified input must be an integer or infinite

adds r3, #12
mov r2, r1
lsls r2, r2, r3 // r2 has remaining fractional mantissa bits of r1
bne 1f // not integer as non zero fractional bits remain
subs r3, #32
asrs r2, r3, #31
bics r3, r3, r2
movs r2, r0
lsls r2, r2, r3
bne 1f // remaining fractional bits are non-zero, so argument was not an integer
2:
// integer
mov r2, ip
b double2fix64
3: // result is zero
movs r0, #0
movs r1, #0
bx lr
1:
push {lr}
mov r2, ip
bl double2fix64
movs r2, #0
adds r0, #1
adcs r1, r2
pop {pc}

double_section double2fix64
regular_func double2fix64
shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 double2fix64_shim

double_section double2ufix64
regular_func double2ufix64
regular_func double2ufix64_z
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 double2ufix64_shim

double_section double2fix
regular_func double2fix
shimmable_table_tail_call SF_TABLE_FLOAT2FIX double2fix_shim

double_section double2fix_z
regular_func double2fix_z
lsls r3, r1, #1
asrs r3, #21
beq 2f // input is zero or denormal
adds r3, #1
beq 3f // input is infinite or nan

// extract exponent again
lsls r3, r1, #1
lsrs r3, #21
// adjust
adds r3, r2
ble 2f // adjusted input is zero or dedornmal or < 1
lsrs r3, r3, #11
bne 3f // adjusted input is > infinite

lsls r2, r2, #20 // align exponent adjustment offset
adds r1, r1, r2 // we know adjustment is safe
b double2int_z
2:
// result is zero
movs r0, #0
bx lr
3:
movs r0, #0
subs r0, #1
lsrs r0, #1
asrs r1, #31
eors r0, r1
bx lr

double_section double2ufix
regular_func double2ufix
regular_func double2ufix_z
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX double2ufix_shim

double_wrapper_section __aeabi_d2f
Expand Down
Loading
Loading