Optimized softfloat by precalculating signbit

ZERICO2005 · ZERICO2005 · commit 079f70b3f32e · 2025-04-06T08:40:43.000-06:00
diff --git a/src/crt/ddiv.src b/src/crt/ddiv.src
@@ -4,14 +4,23 @@
 
 	public	__ddiv
 
-; float64_t f64_div(float64_t, const float64_t*)
+; float64_t f64_div(bool, float64_t, const float64_t*)
 __ddiv:
 	push	af, iy
 	ld	iy, 9
-	add	iy, sp
+	add	iy, sp	
+	
 	push	iy, bc, de, hl
+
+	ld	a, b	; signbit(x)
+	xor	a, (iy + 7)	; signbit(y)
+	rla
+	push	af	; Carry = (signbit(x) != signbit(y))
+	
 	call	_f64_div
-	pop	af, af, af, af, iy, af
+	
+	pop	af, af, af, af, af
+	pop	iy, af	; restore
 	ret
 
 	extern	_f64_div
diff --git a/src/crt/dmul.src b/src/crt/dmul.src
@@ -4,14 +4,23 @@
 
 	public	__dmul
 
-; float64_t f64_mul(float64_t, const float64_t*)
+; float64_t f64_mul(bool, float64_t, const float64_t*)
 __dmul:
 	push	af, iy
 	ld	iy, 9
-	add	iy, sp
+	add	iy, sp	
+	
 	push	iy, bc, de, hl
+	
+	ld	a, b	; signbit(x)
+	xor	a, (iy + 7)	; signbit(y)
+	rla
+	push	af	; Carry = (signbit(x) != signbit(y))
+	
 	call	_f64_mul
-	pop	af, af, af, af, iy, af
+	
+	pop	af, af, af, af, af
+	pop	iy, af	; restore
 	ret
 
 	extern	_f64_mul
diff --git a/src/crt/drem.src b/src/crt/drem.src
@@ -4,14 +4,20 @@
 
 	public	__drem
 
-; float64_t f64_rem(float64_t, const float64_t*)
+; float64_t f64_rem(bool, float64_t, const float64_t*)
 __drem:
 	push	af, iy
 	ld	iy, 9
 	add	iy, sp
 	push	iy, bc, de, hl
+	
+	rl	b
+	push	af	; Carry = signbit(x)
+
 	call	_f64_rem
-	pop	af, af, af, af, iy, af
+
+	pop	af, af, af, af, af
+	pop	iy, af	; restore
 	ret
 
 	extern	_f64_rem
diff --git a/src/libc/fmal.src b/src/libc/fmal.src
@@ -4,8 +4,18 @@
 
 	public	_fmal, __debug_fmal
 
-_fmal := _softfloat_mulAddF64
-__debug_fmal := _softfloat_mulAddF64
+_fmal:
+__debug_fmal:
 	; flags handled by softfloat
+	ld	iy, 0
+	add	iy, sp
+	ld	a, (iy + 28)
+	rlca
+	ld	(iy + 11), a	; signC
+	ld	a, (iy + 10)
+	xor	a, (iy + 19)
+	rlca
+	ld	(iy + 20), a	; signZ
+	jq	_softfloat_mulAddF64
 
 	extern	_softfloat_mulAddF64
diff --git a/src/libc/fmodl.c b/src/libc/fmodl.c
diff --git a/src/libc/fmodl.src b/src/libc/fmodl.src
@@ -0,0 +1,31 @@
+	assume	adl = 1
+
+	section	.text
+
+	public	_fmodl
+
+_fmodl:
+	call	__frameset0
+
+	pea	ix + 15	; &y
+	
+	ld	hl, (ix + 12)
+	push	hl	; x[63:48]
+	
+	ld	hl, (ix + 9)
+	push	hl	; x[47:24]
+
+	ld	hl, (ix + 6)
+	push	hl	; x[23:0]
+	
+	ld	a, h
+	rla
+	push	af	; Carry = signbit(x)
+
+	call	_f64_rem
+	ld	sp, ix
+	pop	ix
+	ret
+
+	extern	_f64_rem
+	extern	__frameset0
diff --git a/src/libc/sqrtl.src b/src/libc/sqrtl.src
@@ -3,7 +3,15 @@
 	section	.text
 
 	public	_sqrtl
-_sqrtl := _f64_sqrt
+
+_sqrtl:
 	; flags handled by softfloat
+	ld	hl, 10
+	add	hl, sp
+	ld	a, (hl)
+	rlca
+	inc	hl
+	ld	(hl), a
+	jq	_f64_sqrt
 
 	extern	_f64_sqrt
diff --git a/src/softfloat/f64_div.c b/src/softfloat/f64_div.c
@@ -41,19 +41,19 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "specialize.h"
 #include "softfloat.h"
 
-float64_t f64_div( float64_t a, const float64_t *b )
+float64_t f64_div( bool signZ, float64_t a, float64_t *__restrict b )
 {
     union ui64_f64 uA;
     uint_fast64_t uiA;
-    bool signA;
+    // bool signA;
     int_fast16_t expA;
     uint_fast64_t sigA;
     union ui64_f64 uB;
     uint_fast64_t uiB;
-    bool signB;
+    // bool signB;
     int_fast16_t expB;
     uint_fast64_t sigB;
-    bool signZ;
+    // bool signZ;
     struct exp16_sig64 normExpSig;
     int_fast16_t expZ;
     uint32_t recip32, sig32Z, doubleTerm;
@@ -67,15 +67,15 @@ float64_t f64_div( float64_t a, const float64_t *b )
     *------------------------------------------------------------------------*/
     uA.f = a;
     uiA = uA.ui;
-    signA = signF64UI( uiA );
+    // signA = signF64UI( uiA );
     expA  = expF64UI( uiA );
     sigA  = fracF64UI( uiA );
     uB.f = *b;
     uiB = uB.ui;
-    signB = signF64UI( uiB );
+    // signB = signF64UI( uiB );
     expB  = expF64UI( uiB );
     sigB  = fracF64UI( uiB );
-    signZ = signA ^ signB;
+    // signZ = signA ^ signB;
     /*------------------------------------------------------------------------
     *------------------------------------------------------------------------*/
     if ( expA == 0x7FF ) {
diff --git a/src/softfloat/f64_mul.c b/src/softfloat/f64_mul.c
@@ -41,19 +41,19 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "specialize.h"
 #include "softfloat.h"
 
-float64_t f64_mul( float64_t a, const float64_t *b )
+float64_t f64_mul( bool signZ, float64_t a, float64_t *__restrict b )
 {
     union ui64_f64 uA;
     uint_fast64_t uiA;
-    bool signA;
+    // bool signA;
     int_fast16_t expA;
     uint_fast64_t sigA;
     union ui64_f64 uB;
     uint_fast64_t uiB;
-    bool signB;
+    // bool signB;
     int_fast16_t expB;
     uint_fast64_t sigB;
-    bool signZ;
+    // bool signZ;
     uint_fast64_t magBits;
     struct exp16_sig64 normExpSig;
     int_fast16_t expZ;
@@ -69,15 +69,15 @@ float64_t f64_mul( float64_t a, const float64_t *b )
     *------------------------------------------------------------------------*/
     uA.f = a;
     uiA = uA.ui;
-    signA = signF64UI( uiA );
+    // signA = signF64UI( uiA );
     expA  = expF64UI( uiA );
     sigA  = fracF64UI( uiA );
     uB.f = *b;
     uiB = uB.ui;
-    signB = signF64UI( uiB );
+    // signB = signF64UI( uiB );
     expB  = expF64UI( uiB );
     sigB  = fracF64UI( uiB );
-    signZ = signA ^ signB;
+    // signZ = signA ^ signB;
     /*------------------------------------------------------------------------
     *------------------------------------------------------------------------*/
     if ( expA == 0x7FF ) {
diff --git a/src/softfloat/f64_rem.c b/src/softfloat/f64_rem.c
@@ -41,11 +41,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "specialize.h"
 #include "softfloat.h"
 
-float64_t f64_rem( float64_t a, const float64_t *b )
+float64_t f64_rem( bool signA, float64_t a, float64_t *__restrict b )
 {
     union ui64_f64 uA;
     uint_fast64_t uiA;
-    bool signA;
+    // bool signA;
     int_fast16_t expA;
     uint_fast64_t sigA;
     union ui64_f64 uB;
@@ -66,7 +66,7 @@ float64_t f64_rem( float64_t a, const float64_t *b )
     *------------------------------------------------------------------------*/
     uA.f = a;
     uiA = uA.ui;
-    signA = signF64UI( uiA );
+    // signA = signF64UI( uiA );
     expA  = expF64UI( uiA );
     sigA  = fracF64UI( uiA );
     uB.f = *b;
diff --git a/src/softfloat/f64_sqrt.c b/src/softfloat/f64_sqrt.c
@@ -41,11 +41,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "specialize.h"
 #include "softfloat.h"
 
-float64_t f64_sqrt( float64_t a )
+float64_t f64_sqrt( f64_param A )
 {
     union ui64_f64 uA;
     uint_fast64_t uiA;
-    bool signA;
+    bool signA = A.sign;
     int_fast16_t expA;
     uint_fast64_t sigA, uiZ;
     struct exp16_sig64 normExpSig;
@@ -55,12 +55,13 @@ float64_t f64_sqrt( float64_t a )
     uint32_t q;
     uint_fast64_t sigZ, shiftedSigZ;
     union ui64_f64 uZ;
+    uint_fast64_t a = A.ui;
 
     /*------------------------------------------------------------------------
     *------------------------------------------------------------------------*/
     uA.f = a;
     uiA = uA.ui;
-    signA = signF64UI( uiA );
+    // signA = signF64UI( uiA );
     expA  = expF64UI( uiA );
     sigA  = fracF64UI( uiA );
     /*------------------------------------------------------------------------
diff --git a/src/softfloat/include/internals.h b/src/softfloat/include/internals.h
@@ -153,13 +153,23 @@ float64_t softfloat_subMagsF64( uint_fast64_t, uint_fast64_t, bool );
 float64_t softfloat_addMagsF64( uint_fast64_t, const uint_fast64_t*, bool );
 float64_t softfloat_subMagsF64( uint_fast64_t, const uint_fast64_t*, bool );
 #endif
-#if 0
-float64_t
- softfloat_mulAddF64(
-     uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast8_t );
-#else
-float64_t softfloat_mulAddF64( uint_fast64_t, uint_fast64_t, uint_fast64_t );
-#endif
+
+typedef struct f64_param {
+    uint_fast64_t ui;
+    bool sign;
+} f64_param;
+
+// #if 0
+// float64_t
+//  softfloat_mulAddF64(
+//      uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast8_t );
+// #elif 1
+// struct input_mulAddF64;
+// float64_t softfloat_mulAddF64( uint_fast64_t, uint_fast64_t, uint_fast64_t);
+// #else
+// struct input_mulAddF64;
+// float64_t softfloat_mulAddF64( bool, bool, struct input_mulAddF64 * );
+// #endif
 
 /*----------------------------------------------------------------------------
 *----------------------------------------------------------------------------*/
diff --git a/src/softfloat/include/softfloat.h b/src/softfloat/include/softfloat.h
@@ -252,11 +252,17 @@ float64_t f64_roundToInt( float64_t, uint_fast8_t, bool );
 float64_t f64_add( float64_t, float64_t );
 float64_t f64_sub( float64_t, float64_t );
 #endif
+#if 0
 float64_t f64_mul( float64_t, const float64_t* );
-float64_t f64_mulAdd( float64_t, float64_t, float64_t );
 float64_t f64_div( float64_t, const float64_t* );
 float64_t f64_rem( float64_t, const float64_t* );
-float64_t f64_sqrt( float64_t );
+#else
+float64_t f64_mul( bool, float64_t, float64_t *__restrict );
+float64_t f64_div( bool, float64_t, float64_t *__restrict );
+float64_t f64_rem( bool, float64_t, float64_t *__restrict );
+#endif
+float64_t f64_mulAdd( float64_t, float64_t, float64_t );
+// float64_t f64_sqrt( float64_t );
 bool f64_eq( float64_t, float64_t );
 bool f64_le( float64_t, float64_t );
 bool f64_lt( float64_t, float64_t );
diff --git a/src/softfloat/s_mulAddF64.c b/src/softfloat/s_mulAddF64.c