raspberrypi
diff --git a/‎src/rp2_common/pico_double/double_aeabi_dcp.S‎
Lines changed: 25 additions & 26 deletions b/‎src/rp2_common/pico_double/double_aeabi_dcp.S‎
Lines changed: 25 additions & 26 deletions
diff --git a/‎src/rp2_common/pico_double/double_aeabi_rp2040.S‎
Lines changed: 67 additions & 19 deletions b/‎src/rp2_common/pico_double/double_aeabi_rp2040.S‎
Lines changed: 67 additions & 19 deletions
diff --git a/‎src/rp2_common/pico_double/double_conv_m33.S‎
Lines changed: 41 additions & 33 deletions b/‎src/rp2_common/pico_double/double_conv_m33.S‎
Lines changed: 41 additions & 33 deletions
@@ -7,7 +7,7 @@
 #include "pico/asm_helper.S"
 
 #if !HAS_DOUBLE_COPROCESSOR
-#error attempt to compile double_aeabi_rp2350 when there is no DCP
+#error attempt to compile double_aeabi_dcp when there is no DCP
 #else
 
 #include "hardware/dcp_instr.inc.S"
@@ -29,7 +29,7 @@ double_section WRAPPER_FUNC_NAME(\func)
 
 // ============== STATE SAVE AND RESTORE ===============
 
-.macro saving_func type func
+.macro saving_func type func, opt_label1='-', opt_label2='-'
   // Note we are usually 32-bit aligned already at this point, as most of the
   // function bodies contain exactly two 16-bit instructions: bmi and bx lr.
   // We want the PCMP word-aligned.
@@ -41,6 +41,12 @@ double_section WRAPPER_FUNC_NAME(\func)
   push {lr}              // 16-bit instruction
   bl generic_save_state  // 32-bit instruction
   b 1f                   // 16-bit instruction
+.ifnc \opt_label1,'-'
+regular_func \opt_label1
+.endif
+.ifnc \opt_label2,'-'
+regular_func \opt_label2
+.endif
   // This is the actual entry point:
 \type\()_func \func
   PCMP apsr_nzcv
@@ -130,28 +136,24 @@ saving_func wrapper sqrt
 
 double_section dclassify
 saving_func regular dclassify
-@ with correct rounding
   dcp_dclassify_m apsr_nzcv,r0,r1
   saving_func_return
 
 // ============== CONVERSION FUNCTIONS ===============
 
 double_wrapper_section __aeabi_d2f
-regular_func double2float
-saving_func wrapper __aeabi_d2f
+saving_func wrapper __aeabi_d2f double2float
 @ with rounding
   dcp_double2float_m r0,r0,r1
   saving_func_return
 
 double_wrapper_section __aeabi_i2d
-regular_func int2double
-saving_func wrapper __aeabi_i2d
+saving_func wrapper __aeabi_i2d int2double
   dcp_int2double_m r0,r1,r0
   saving_func_return
 
 double_wrapper_section __aeabi_ui2d
-regular_func uint2double
-saving_func wrapper __aeabi_ui2d
+saving_func wrapper __aeabi_ui2d uint2double
   dcp_uint2double_m r0,r1,r0
   saving_func_return
 
@@ -170,8 +172,7 @@ saving_func regular double2fix_z
   b double2int_z_entry
 
 double_section double2ufix
-regular_func double2ufix
-saving_func regular double2ufix_z
+saving_func regular double2ufix_z double2ufix
 double2ufix_z_entry:
   ubfx r3, r1, #20, #11
   adds r3, r2
@@ -188,6 +189,7 @@ double2ufix_z_entry:
 double_section double2fix
 saving_func regular double2fix
   ubfx r3, r1, #20, #11
+  cbz r3, 2f // 0 or denormal
   adds r3, r2
   beq 1f // very small; we don't care that we might make a denormal
   asrs ip, r3, #11
@@ -198,47 +200,45 @@ saving_func regular double2fix
 1:
   bfi r1, r3, #20, #11
   b double2int_entry
+2:
+  movs r0, #0
+saving_func_return
+
 
 double_section double2int
 saving_func regular double2int
 double2int_entry:
   lsls r2, r1, #1
   bcc double2int_z_entry // positive is ok for int64_z
-  orrs r3, r2, r0
-  beq double2int_z_entry // 0 or -0 is ok for int64_z
+  lsrs r3, r2, #21
+  beq double2int_z_entry // 0 or -0 or denormal is ok for int_z
 
   lsrs r2, #21
   adds r2, #1
   subs r2, r2, #0x400
   bcc 1f // <1 means subtract 1
-  cmp r2, #52
-  bge double2int_z_entry // must be an integer
+  cmp r2, #31
+  bge double2int_z_entry // must be an integer or maxed out
   lsls r3, r1, #12
-  adds r3, r3, r0, lsr #20
-  // r3 now has highest 32 mantissa bits
-  lsls r3, r2
-  bne 1f // not integer as non zero fractional bits remain
-  lsls r3, r0, #12
+  adds r3, r3, r0, lsr #20 // r3 now has highest 32 mantissa bits
   lsls r3, r2
+  orrs r3, r3, r0, lsl #12 // these bits are all guaranteed to be in the fraction
   beq double2int_z_entry // integer
 1:
   dcp_double2int_m r0,r0,r1
   subs r0, #1
 saving_func_return
 
 double_wrapper_section __aeabi_d2iz
-regular_func double2int_z
-saving_func wrapper __aeabi_d2iz
+saving_func wrapper __aeabi_d2iz double2int_z
 double2int_z_entry:
 @ with truncation towards 0
   dcp_double2int_m r0,r0,r1
   // note: this works with either saved or not saved call as it is just a `bx lr`
   saving_func_return
 
 double_wrapper_section __aeabi_d2uiz
-regular_func double2uint_z
-regular_func double2uint
-saving_func wrapper __aeabi_d2uiz
+saving_func wrapper __aeabi_d2uiz double2uint double2uint_z
 double2uint_z_entry:
 @ with truncation towards 0
   dcp_double2uint_m r0,r0,r1
@@ -266,7 +266,6 @@ saving_func wrapper __aeabi_dcmpun
   saving_func_return
 
 double_wrapper_section __aeabi_dcmp
-
 saving_func wrapper __aeabi_cdrcmple
   dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 // with arguments reversed
   bvs cmp_nan
 
@@ -578,35 +578,45 @@ regular_func double2uint64_z
 double_section double2fix64_z
 regular_func double2fix64_z
   lsls r3, r1, #1
-  bcc double2fix64 // positive is ok for fix64
-  push {r1, r2}
-  lsrs r3, #21
+  bcc double2fix64 // input positive is ok for fix64
+  mov ip, r2
+  asrs r2, r3, #21
+  beq 3f           // input zero or denormal, so just return zero
+  adds r2, #1
+  beq double2fix64 // input infinite/nan is ok for fix64
 
-  adds r3, r2
+  lsrs r3, #21
+  add r3, ip
   movs r2, #1
   negs r2, r2
   lsrs r2, #22
-  subs r3, r2
+  subs r3, r2 // r3 = modified e - 0x3ff
 
-  bcc 1f // <1 means subtract 1
+  bcc 3f // modified input < 1.0 means result is zero
   cmp r3, #52
-  bge double2fix64 // must be an integer
-
-  lsls r2, r1, #12
-  lsrs r1, r0, #20
-  adds r2, r1
-  // r2 now has highest 32 mantissa bits
-  lsls r2, r3
-  bne 1f // not integer as non zero fractional bits remain
-  lsls r2, r0, #12
-  lsls r2, r3
-  bne 1f
+  bge 2f // modified input must be an integer or infinite
+
+  adds r3, #12
+  mov r2, r1
+  lsls r2, r2, r3    // r2 has remaining fractional mantissa bits of r1
+  bne 1f             // not integer as non zero fractional bits remain
+  subs r3, #32
+  asrs r2, r3, #31
+  bics r3, r3, r2
+  movs r2, r0
+  lsls r2, r2, r3
+  bne 1f             // remaining fractional bits are non-zero, so argument was not an integer
+2:
   // integer
-  pop {r1, r2}
+  mov r2, ip
   b double2fix64
+3: // result is zero
+  movs r0, #0
+  movs r1, #0
+  bx lr
 1:
-  pop {r1, r2}
   push {lr}
+  mov r2, ip
   bl double2fix64
   movs r2, #0
   adds r0, #1
@@ -626,6 +636,44 @@ double_section double2fix
 regular_func double2fix
     shimmable_table_tail_call SF_TABLE_FLOAT2FIX double2fix_shim
 
+double_section double2fix_z
+regular_func double2fix_z
+  lsls r3, r1, #1
+  asrs r3, #21
+  beq 2f // input is zero or denormal
+  adds r3, #1
+  beq 3f // input is infinite or nan
+
+  // extract exponent again
+  lsls r3, r1, #1
+  lsrs r3, #21
+  // adjust
+  adds r3, r2
+  ble 2f // adjusted input is zero or dedornmal or < 1
+  lsrs r2, r3, #11
+  bne 3f // adjusted input is > infinite
+
+  // put updated exponent back in double and convert to int
+  movs r2, #1
+  lsls r2, #11
+  subs r2, #1
+  lsls r2, #20
+  bics r1, r2
+  lsls r3, #20
+  orrs r1, r3
+  b double2int_z
+2:
+  // result is zero
+  movs r0, #0
+  bx lr
+3:
+  movs r0, #0
+  subs r0, #1
+  lsrs r0, #1
+  asrs r1, #31
+  eors r0, r1
+  bx lr
+
 double_section double2ufix
 regular_func double2ufix
 regular_func double2ufix_z
 
@@ -251,57 +251,65 @@ regular_func ufix2double
 
 double_section conv_dtoi64
 regular_func double2int64
-  lsls r2, r1, #1
-  bcc double2int64_z // positive is ok for int64_z
-  orrs r3, r2, r0
-  beq double2int64_z // 0 or -0 is ok for int64_z
-
-  lsrs r2, #21
-  adds r2, #1
-  subs r2, r2, #0x400
-  bcc 1f // <1 means subtract 1
-  cmp r2, #52
-  bge double2int64_z // must be an integer
-  lsls r3, r1, #12
-  adds r3, r3, r0, lsr #20
-  // r3 now has highest 32 mantissa bits
-  lsls r3, r2
-  bne 1f // not integer as non zero fractional bits remain
-  lsls r3, r0, #12
-  lsls r3, r2
-  beq double2int64_z // integer
+  lsls r3, r1, #1
+  bcc double2int64_z // input positive is ok for int64_z
+  cmp r3, #0xffe00000
+  bcs double2int64_z // input is infinite
+  lsrs r3, #21
+  beq 2f // input zero or denormal, means answer remains zero
+  sub r3, #0x3ff
+  cmp r3, #0
+  blt 1f // modified input zero or denormal, or less than 1.0
+  cmp r3, #52
+  bge double2int64_z // modified input must be an integer or infinite
+  adds r3, #12
+  lsls r2, r1, r3    // ip has remaining fractional mantissa bits of r1
+  bne 1f             // not integer as non zero fractional bits remain
+  subs r3, #32
+  bics r3, r3, r3, asr #31 // map negative shift to zero
+  lsls r3, r0, r3
+  beq double2int64_z   // remaining fractional bits are 0, so argument was an integer
 1:
   push {lr}
   bl double2int64_z
   subs r0, #1
   sbcs r1, r1, #0
   pop {pc}
+2:
+  movs r0, #0
+  movs r1, #0
+  bx lr
 
 double_section conv_dtofix64
 regular_func double2fix64
   lsls r3, r1, #1
-  bcc double2fix64_z // positive is ok for int64_z
+  bcc double2fix64_z // input positive is ok for fix64_z
+  cmp r3, #0xffe00000
+  bcs double2fix64_z // input is infinite
   lsrs r3, #21
-  adds r3, #1
-  rsb ip, r2, #0x400
-  subs r3, ip
-  bcc 1f // <1 means subtract 1
+  beq 2f // input zero or denormal, means answer remains zero
+  sub r3, #0x3ff
+  adds r3, r2
+  blt 1f // modified input zero or denormal, or less than 1.0
   cmp r3, #52
-  bge double2fix64_z // must be an integer
-  lsls ip, r1, #12
-  adds ip, ip, r0, lsr #20
-  // ip now has highest 32 mantissa bits
-  lsls ip, r3
-  bne 1f // not integer as non zero fractional bits remain
-  lsls ip, r0, #12
-  lsls ip, r3
-  beq double2fix64_z // integer
+  bge double2fix64_z // modified input must be an integer or infinite
+  adds r3, #12
+  lsls ip, r1, r3    // ip has remaining fractional mantissa bits of r1
+  bne 1f             // not integer as non zero fractional bits remain
+  subs r3, #32
+  bics r3, r3, r3, asr #31 // map negative shift to zero
+  lsls r3, r0, r3
+  beq double2fix64_z   // remaining fractional bits are 0, so argument was an integer
 1:
   push {lr}
   bl double2fix64_z
   subs r0, #1
   sbcs r1, r1, #0
   pop {pc}
+2:
+  movs r0, #0
+  movs r1, #0
+  bx lr
 
 double_wrapper_section conv_dtoi64_z