Implement f8_to_f16 and f16_to_f8 conversion functions

rbuchner-aril · rbuchner-aril · commit 422234b4cf7b · 2025-02-06T16:21:20.000-08:00
diff --git a/softfloat/f16_to_f8.c b/softfloat/f16_to_f8.c
@@ -0,0 +1,89 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float8_t f16_to_f8( float16_t a )
+{
+  union ui16_f16 uA;
+  uint_fast16_t uiA;
+  bool sign;
+  int_fast8_t exp;
+  uint_fast16_t frac;
+  struct commonNaN commonNaN;
+  uint_fast8_t uiZ;
+  uint_fast16_t frac8;
+  union ui8_f8 uZ;
+
+  /*------------------------------------------------------------------------
+   *------------------------------------------------------------------------*/
+  uA.f = a;
+  uiA = uA.ui;
+  sign = signF16UI( uiA );
+  exp  = expF16UI( uiA );
+  frac = fracF16UI( uiA );
+  /*------------------------------------------------------------------------
+   *------------------------------------------------------------------------*/
+  if ( exp == 0xFF ) {
+    if ( frac ) {
+      softfloat_f16UIToCommonNaN( uiA, &commonNaN );
+      uiZ = softfloat_commonNaNToF8UI( &commonNaN );
+    } else {
+      uiZ = signInfF8UI( sign );
+    }
+    goto uiZ;
+  }
+  /*------------------------------------------------------------------------
+   *------------------------------------------------------------------------*/
+  frac8 = frac>>2 | ((frac & 0x3) != 0); // Round and preserve sticky bit
+  if ( ! (exp | frac8) ) {
+    uiZ = packToF8UI( 0, 0, 0 ); // zero
+    goto uiZ;
+  }
+  /*------------------------------------------------------------------------
+   *------------------------------------------------------------------------*/
+  return softfloat_roundPackToF8( sign, exp - 0xC, frac8 | 0x100 );
+ uiZ:
+  uZ.ui = uiZ;
+  return uZ.f;
+
+}
+
diff --git a/softfloat/f8_to_f16.c b/softfloat/f8_to_f16.c
@@ -0,0 +1,91 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float16_t f8_to_f16( float8_t a )
+{
+    union ui8_f8 uA;
+    uint_fast8_t uiA;
+    bool sign;
+    int_fast8_t exp;
+    uint_fast8_t frac;
+    uint_fast16_t uiZ;
+    struct exp8_sig8 normExpSig;
+    union ui16_f16 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA = uA.ui;
+    sign = signF8UI( uiA );
+    exp  = expF8UI( uiA );
+    frac = fracF8UI( uiA );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( isNaNF8UI(uiA) ) {
+      uiZ = defaultNaNF16UI;
+      goto uiZ;
+    }
+    if ( isInfF8UI(uiA) ) {
+      uiZ = packToF16UI( sign, 0x1F, 0 );
+      goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! exp ) {
+        if ( ! frac ) {
+            uiZ = packToF16UI( 0, 0, 0 );
+            goto uiZ;
+        }
+        normExpSig = softfloat_normSubnormalF8Sig( frac );
+        exp = normExpSig.exp - 1;
+        frac = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiZ = packToF16UI( sign, exp + 0xB, (uint_fast16_t) frac<<6 );
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h
@@ -140,6 +140,11 @@ float128_t i64_to_f128( int64_t );
 void i64_to_extF80M( int64_t, extFloat80_t * );
 void i64_to_f128M( int64_t, float128_t * );
 
+/*----------------------------------------------------------------------------
+| 8-bit (half-precision) floating-point operations.
+*----------------------------------------------------------------------------*/
+float16_t f8_to_f16( float8_t );
+
 /*----------------------------------------------------------------------------
 | 16-bit (half-precision) floating-point operations.
 *----------------------------------------------------------------------------*/
@@ -155,6 +160,7 @@ uint_fast32_t f16_to_ui32_r_minMag( float16_t, bool );
 uint_fast64_t f16_to_ui64_r_minMag( float16_t, bool );
 int_fast32_t f16_to_i32_r_minMag( float16_t, bool );
 int_fast64_t f16_to_i64_r_minMag( float16_t, bool );
+float8_t f16_to_f8( float16_t );
 float32_t f16_to_f32( float16_t );
 float64_t f16_to_f64( float16_t );
 #ifdef SOFTFLOAT_FAST_INT64
diff --git a/softfloat/softfloat.mk.in b/softfloat/softfloat.mk.in
@@ -52,7 +52,9 @@ softfloat_c_srcs = \
 	f16_roundToInt.c \
 	f16_sqrt.c \
 	f16_sub.c \
+	f16_to_f8.c \
 	f16_to_f128.c \
+	f8_to_f16.c \
 	f16_to_f32.c \
 	f16_to_f64.c \
 	f16_to_i8.c \
diff --git a/softfloat/specialize.h b/softfloat/specialize.h
@@ -103,6 +103,12 @@ struct commonNaN { char _unused; };
 *----------------------------------------------------------------------------*/
 #define defaultNaNBF16UI 0x7FC0
 
+/*----------------------------------------------------------------------------
+| Converts the common NaN pointed to by `aPtr' into a 8-bit floating-point
+| NaN, and returns the bit pattern of this value as an unsigned integer.
+*----------------------------------------------------------------------------*/
+#define softfloat_commonNaNToF8UI( aPtr ) ((uint_fast8_t) defaultNaNF8UI)
+
 /*----------------------------------------------------------------------------
 | Returns true when 16-bit unsigned integer `uiA' has the bit pattern of a
 | 16-bit floating-point signaling NaN.