raspberrypi · kilograham · Feb 4, 2025 · Jan 23, 2025 · Jan 24, 2025 · Jan 24, 2025
diff --git a/src/rp2_common/hardware_dma/include/hardware/dma.h b/src/rp2_common/hardware_dma/include/hardware/dma.h
@@ -535,7 +535,7 @@ static inline void dma_channel_start(uint channel) {
  *\endcode
  *
  * \if rp2350_specific
- * RP2350 only: Due to errata RP12350-E5 (see the RP2350 datasheet for further detail), it is necessary to clear the enable bit of
+ * RP2350 only: Due to errata RP2350-E5 (see the RP2350 datasheet for further detail), it is necessary to clear the enable bit of
  * the aborted channel and any chained channels prior to the abort to prevent re-triggering.
  * \endif
  *

diff --git a/src/rp2_common/pico_double/double_aeabi_dcp.S b/src/rp2_common/pico_double/double_aeabi_dcp.S
@@ -7,7 +7,7 @@
 #include "pico/asm_helper.S"
 
 #if !HAS_DOUBLE_COPROCESSOR
-#error attempt to compile double_aeabi_rp2350 when there is no DCP
+#error attempt to compile double_aeabi_dcp when there is no DCP
 #else
 
 #include "hardware/dcp_instr.inc.S"
@@ -29,7 +29,7 @@ double_section WRAPPER_FUNC_NAME(\func)
 
 // ============== STATE SAVE AND RESTORE ===============
 
-.macro saving_func type func
+.macro saving_func type func, opt_label1='-', opt_label2='-'
   // Note we are usually 32-bit aligned already at this point, as most of the
   // function bodies contain exactly two 16-bit instructions: bmi and bx lr.
   // We want the PCMP word-aligned.
@@ -41,6 +41,12 @@ double_section WRAPPER_FUNC_NAME(\func)
   push {lr}              // 16-bit instruction
   bl generic_save_state  // 32-bit instruction
   b 1f                   // 16-bit instruction
+.ifnc \opt_label1,'-'
+regular_func \opt_label1
+.endif
+.ifnc \opt_label2,'-'
+regular_func \opt_label2
+.endif
   // This is the actual entry point:
 \type\()_func \func
   PCMP apsr_nzcv
@@ -128,53 +134,124 @@ saving_func wrapper sqrt
   dcp_dsqrt_m r0,r1,r0,r1,r0,r1,r2,r3,r12
   saving_func_return
 
-// todo not a real thing
-double_wrapper_section __aeabi_dclassify
-saving_func wrapper __aeabi_dclassify
-@ with correct rounding
+double_section dclassify
+saving_func regular dclassify
   dcp_dclassify_m apsr_nzcv,r0,r1
   saving_func_return
 
 // ============== CONVERSION FUNCTIONS ===============
 
 double_wrapper_section __aeabi_d2f
-saving_func wrapper __aeabi_d2f
+saving_func wrapper __aeabi_d2f double2float
 @ with rounding
   dcp_double2float_m r0,r0,r1
   saving_func_return
 
 double_wrapper_section __aeabi_i2d
-saving_func wrapper __aeabi_i2d
+saving_func wrapper __aeabi_i2d int2double
   dcp_int2double_m r0,r1,r0
   saving_func_return
 
 double_wrapper_section __aeabi_ui2d
-saving_func wrapper __aeabi_ui2d
+saving_func wrapper __aeabi_ui2d uint2double
   dcp_uint2double_m r0,r1,r0
   saving_func_return
 
+double_section double2fix_z
+saving_func regular double2fix_z
+  ubfx r3, r1, #20, #11
+  adds r3, r2
+  beq 1f // very small; we don't care that we might make a denormal
+  asrs ip, r3, #11
+  beq 1f
+  ite pl
+  movpl r3, #0x7ff
+  movsmi r3, #0
+1:
+  bfi r1, r3, #20, #11
+  b double2int_z_entry
+
+double_section double2ufix
+saving_func regular double2ufix_z double2ufix
+double2ufix_z_entry:
+  ubfx r3, r1, #20, #11
+  adds r3, r2
+  beq 1f // very small; we don't care that we might make a denormal
+  asrs ip, r3, #11
+  beq 1f
+  ite pl
+  lsrspl r3, r1, #20 // 0x7ff
+  movsmi r3, #0
+1:
+  bfi r1, r3, #20, #11
+  b double2uint_z_entry
+
+double_section double2fix
+saving_func regular double2fix
+  ubfx r3, r1, #20, #11
+  cbz r3, 2f // 0 or denormal
+  adds r3, r2
+  beq 1f // very small; we don't care that we might make a denormal
+  asrs ip, r3, #11
+  beq 1f
+  ite pl
+  movpl r3, #0x7ff
+  movsmi r3, #0
+1:
+  bfi r1, r3, #20, #11
+  b double2int_entry
+2:
+  movs r0, #0
+saving_func_return
+
+
+double_section double2int
+saving_func regular double2int
+double2int_entry:
+  lsls r2, r1, #1
+  bcc double2int_z_entry // positive is ok for int64_z
+  lsrs r3, r2, #21
+  beq double2int_z_entry // 0 or -0 or denormal is ok for int_z
+
+  lsrs r2, #21
+  adds r2, #1
+  subs r2, r2, #0x400
+  bcc 1f // <1 means subtract 1
+  cmp r2, #31
+  bge double2int_z_entry // must be an integer or maxed out
+  lsls r3, r1, #12
+  adds r3, r3, r0, lsr #20 // r3 now has highest 32 mantissa bits
+  lsls r3, r2
+  orrs r3, r3, r0, lsl #12 // these bits are all guaranteed to be in the fraction
+  beq double2int_z_entry // integer
+1:
+  dcp_double2int_m r0,r0,r1
+  subs r0, #1
+saving_func_return
+
 double_wrapper_section __aeabi_d2iz
-saving_func wrapper __aeabi_d2iz
+saving_func wrapper __aeabi_d2iz double2int_z
+double2int_z_entry:
 @ with truncation towards 0
   dcp_double2int_m r0,r0,r1
+  // note: this works with either saved or not saved call as it is just a `bx lr`
   saving_func_return
 
 double_wrapper_section __aeabi_d2uiz
-saving_func wrapper __aeabi_d2uiz
+saving_func wrapper __aeabi_d2uiz double2uint double2uint_z
+double2uint_z_entry:
 @ with truncation towards 0
   dcp_double2uint_m r0,r0,r1
   saving_func_return
 
-// todo not a real thing
-double_wrapper_section __aeabi_d2i_r
-saving_func wrapper __aeabi_d2i_r
+double_section double2int_r
+saving_func regular double2int_r
 @ with rounding
   dcp_double2int_r_m r0,r0,r1
   saving_func_return
 
-// todo not a real thing
-double_wrapper_section __aeabi_d2ui_r
-saving_func wrapper __aeabi_d2ui_r
+double_section double2uint_r
+saving_func regular double2uint_r
 @ with rounding
   dcp_double2uint_r_m r0,r0,r1
   saving_func_return
@@ -189,7 +266,6 @@ saving_func wrapper __aeabi_dcmpun
   saving_func_return
 
 double_wrapper_section __aeabi_dcmp
-
 saving_func wrapper __aeabi_cdrcmple
   dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 // with arguments reversed
   bvs cmp_nan

diff --git a/src/rp2_common/pico_double/double_aeabi_rp2040.S b/src/rp2_common/pico_double/double_aeabi_rp2040.S
@@ -425,13 +425,15 @@ double_wrapper_section __aeabi_ui2d
 double_wrapper_section __aeabi_i2d
 
 wrapper_func __aeabi_ui2d
+regular_func uint2double
     movs r1, #0
     cmp r0, #0
     bne 2f
 1:
     bx lr
 // double FUNC_NAME(__aeabi_i2d)(int)                     integer to double (double precision) conversion
 wrapper_func __aeabi_i2d
+regular_func int2double
     asrs r1, r0, #31
     eors r0, r1
     subs r0, r1
@@ -506,6 +508,7 @@ regular_func double2int
 // unsigned FUNC_NAME(__aeabi_d2uiz)(double)             double (double precision) to unsigned C-style conversion [3]
 double_wrapper_section __aeabi_d2uiz
 wrapper_func __aeabi_d2uiz
+regular_func double2uint_z
 regular_func double2uint
     shimmable_table_tail_call SF_TABLE_FLOAT2UINT double2uint_shim
 
@@ -528,11 +531,13 @@ regular_func ufix642double
 // double FUNC_NAME(__aeabi_l2d)(long long)             long long to double (double precision) conversion
 double_wrapper_section __aeabi_l2d
 wrapper_func __aeabi_l2d
+regular_func int642double
     shimmable_table_tail_call SF_TABLE_INT642FLOAT int642double_shim
 
 // double FUNC_NAME(__aeabi_l2f)(long long)             long long to double (double precision) conversion
 double_wrapper_section __aeabi_ul2d
 wrapper_func __aeabi_ul2d
+regular_func uint642double
     shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642double_shim
 
 // long long FUNC_NAME(__aeabi_d2lz)(double)             double (double precision) to long long C-style conversion [3]
@@ -566,22 +571,106 @@ regular_func double2int64
 // unsigned long long FUNC_NAME(__aeabi_d2ulz)(double)     double to unsigned long long C-style conversion [3]
 double_wrapper_section __aeabi_d2ulz
 wrapper_func __aeabi_d2ulz
+regular_func double2uint64
+regular_func double2uint64_z
     shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 double2uint64_shim
 
+double_section double2fix64_z
+regular_func double2fix64_z
+  lsls r3, r1, #1
+  bcc double2fix64 // input positive is ok for fix64
+  mov ip, r2
+  asrs r2, r3, #21
+  beq 3f           // input zero or denormal, so just return zero
+  adds r2, #1
+  beq double2fix64 // input infinite/nan is ok for fix64
+
+  lsrs r3, #21
+  add r3, ip
+  movs r2, #1
+  negs r2, r2
+  lsrs r2, #22
+  subs r3, r2 // r3 = modified e - 0x3ff
+
+  bcc 3f // modified input < 1.0 means result is zero
+  cmp r3, #52
+  bge 2f // modified input must be an integer or infinite
+
+  adds r3, #12
+  mov r2, r1
+  lsls r2, r2, r3    // r2 has remaining fractional mantissa bits of r1
+  bne 1f             // not integer as non zero fractional bits remain
+  subs r3, #32
+  asrs r2, r3, #31
+  bics r3, r3, r2
+  movs r2, r0
+  lsls r2, r2, r3
+  bne 1f             // remaining fractional bits are non-zero, so argument was not an integer
+2:
+  // integer
+  mov r2, ip
+  b double2fix64
+3: // result is zero
+  movs r0, #0
+  movs r1, #0
+  bx lr
+1:
+  push {lr}
+  mov r2, ip
+  bl double2fix64
+  movs r2, #0
+  adds r0, #1
+  adcs r1, r2
+  pop {pc}
+
 double_section double2fix64
 regular_func double2fix64
     shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 double2fix64_shim
 
 double_section double2ufix64
 regular_func double2ufix64
+regular_func double2ufix64_z
     shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 double2ufix64_shim
 
 double_section double2fix
 regular_func double2fix
     shimmable_table_tail_call SF_TABLE_FLOAT2FIX double2fix_shim
 
+double_section double2fix_z
+regular_func double2fix_z
+  lsls r3, r1, #1
+  asrs r3, #21
+  beq 2f // input is zero or denormal
+  adds r3, #1
+  beq 3f // input is infinite or nan
+
+  // extract exponent again
+  lsls r3, r1, #1
+  lsrs r3, #21
+  // adjust
+  adds r3, r2
+  ble 2f // adjusted input is zero or dedornmal or < 1
+  lsrs r3, r3, #11
+  bne 3f // adjusted input is > infinite
+
+  lsls r2, r2, #20 // align exponent adjustment offset
+  adds r1, r1, r2  // we know adjustment is safe
+  b double2int_z
+2:
+  // result is zero
+  movs r0, #0
+  bx lr
+3:
+  movs r0, #0
+  subs r0, #1
+  lsrs r0, #1
+  asrs r1, #31
+  eors r0, r1
+  bx lr
+
 double_section double2ufix
 regular_func double2ufix
+regular_func double2ufix_z
     shimmable_table_tail_call SF_TABLE_FLOAT2UFIX double2ufix_shim
 
 double_wrapper_section __aeabi_d2f