|
| 1 | + |
| 2 | +#include <assert.h> |
| 3 | +#include <stdbool.h> |
| 4 | +#include <stdint.h> |
| 5 | +#include "platform.h" |
| 6 | +#include "internals.h" |
| 7 | +#include "specialize.h" |
| 8 | +#include "softfloat.h" |
| 9 | +/*********************************************************************************/ |
| 10 | +/********************************support functions********************************/ |
| 11 | +/*********************************************************************************/ |
| 12 | +static inline uint64_t extract64(uint64_t val, int pos, int len) |
| 13 | +{ |
| 14 | + assert(pos >= 0 && len > 0 && len <= 64 - pos); |
| 15 | + return (val >> pos) & (~UINT64_C(0) >> (64 - len)); |
| 16 | +} |
| 17 | + |
| 18 | +static inline uint64_t make_mask64(int pos, int len) |
| 19 | +{ |
| 20 | + assert(pos >= 0 && len > 0 && pos < 64 && len <= 64); |
| 21 | + return (UINT64_MAX >> (64 - len)) << pos; |
| 22 | +} |
| 23 | + |
| 24 | +/*********************************************************************************/ |
| 25 | + |
| 26 | +static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) { |
| 27 | + uint64_t exp = extract64(val, s, e); |
| 28 | + uint64_t sig = extract64(val, 0, s); |
| 29 | + uint64_t sign = extract64(val, s + e, 1); |
| 30 | + const int p = 7; |
| 31 | + |
| 32 | + static const uint8_t table[] = { |
| 33 | + 52, 51, 50, 48, 47, 46, 44, 43, |
| 34 | + 42, 41, 40, 39, 38, 36, 35, 34, |
| 35 | + 33, 32, 31, 30, 30, 29, 28, 27, |
| 36 | + 26, 25, 24, 23, 23, 22, 21, 20, |
| 37 | + 19, 19, 18, 17, 16, 16, 15, 14, |
| 38 | + 14, 13, 12, 12, 11, 10, 10, 9, |
| 39 | + 9, 8, 7, 7, 6, 6, 5, 4, |
| 40 | + 4, 3, 3, 2, 2, 1, 1, 0, |
| 41 | + 127, 125, 123, 121, 119, 118, 116, 114, |
| 42 | + 113, 111, 109, 108, 106, 105, 103, 102, |
| 43 | + 100, 99, 97, 96, 95, 93, 92, 91, |
| 44 | + 90, 88, 87, 86, 85, 84, 83, 82, |
| 45 | + 80, 79, 78, 77, 76, 75, 74, 73, |
| 46 | + 72, 71, 70, 70, 69, 68, 67, 66, |
| 47 | + 65, 64, 63, 63, 62, 61, 60, 59, |
| 48 | + 59, 58, 57, 56, 56, 55, 54, 53}; |
| 49 | + |
| 50 | + if (sub) { |
| 51 | + while (extract64(sig, s - 1, 1) == 0) |
| 52 | + exp--, sig <<= 1; |
| 53 | + |
| 54 | + sig = (sig << 1) & make_mask64(0 ,s); |
| 55 | + } |
| 56 | + |
| 57 | + int idx = ((exp & 1) << (p-1)) | (sig >> (s-p+1)); |
| 58 | + uint64_t out_sig = (uint64_t)(table[idx]) << (s-p); |
| 59 | + uint64_t out_exp = (3 * make_mask64(0, e - 1) + ~exp) / 2; |
| 60 | + |
| 61 | + return (sign << (s+e)) | (out_exp << s) | out_sig; |
| 62 | +} |
| 63 | + |
| 64 | +/*********************************************************************************/ |
| 65 | +float8_1_t f8_1_rsqrte7( float8_1_t in) |
| 66 | +{ |
| 67 | + union ui8_f8_1 uA; |
| 68 | + |
| 69 | + uA.f = in; |
| 70 | + unsigned int ret = f8_1_classify(in); |
| 71 | + bool sub = false; |
| 72 | + switch(ret) { |
| 73 | + case 0x001: // -inf |
| 74 | + case 0x002: // -normal |
| 75 | + case 0x004: // -subnormal |
| 76 | + case 0x100: // sNaN |
| 77 | + softfloat_exceptionFlags |= softfloat_flag_invalid; |
| 78 | + case 0x200: //qNaN |
| 79 | + uA.ui = defaultNaNF8_1UI; |
| 80 | + break; |
| 81 | + case 0x008: // -0 |
| 82 | + uA.ui = 0xf8; |
| 83 | + softfloat_exceptionFlags |= softfloat_flag_infinite; |
| 84 | + break; |
| 85 | + case 0x010: // +0 |
| 86 | + uA.ui = 0x78; |
| 87 | + softfloat_exceptionFlags |= softfloat_flag_infinite; |
| 88 | + break; |
| 89 | + case 0x080: //+inf |
| 90 | + uA.ui = 0x0; |
| 91 | + break; |
| 92 | + case 0x020: //+ sub |
| 93 | + sub = true; |
| 94 | + default: // +num |
| 95 | + uA.ui = rsqrte7(uA.ui, 4, 3, sub); |
| 96 | + break; |
| 97 | + } |
| 98 | + |
| 99 | + return uA.f; |
| 100 | +} |
| 101 | + |
| 102 | +/*********************************************************************************/ |
| 103 | +float8_2_t f8_2_rsqrte7( float8_2_t in) |
| 104 | +{ |
| 105 | + union ui8_f8_2 uA; |
| 106 | + |
| 107 | + uA.f = in; |
| 108 | + unsigned int ret = f8_2_classify(in); |
| 109 | + bool sub = false; |
| 110 | + switch(ret) { |
| 111 | + case 0x001: // -inf |
| 112 | + case 0x002: // -normal |
| 113 | + case 0x004: // -subnormal |
| 114 | + case 0x100: // sNaN |
| 115 | + softfloat_exceptionFlags |= softfloat_flag_invalid; |
| 116 | + case 0x200: //qNaN |
| 117 | + uA.ui = defaultNaNF8_2UI; |
| 118 | + break; |
| 119 | + case 0x008: // -0 |
| 120 | + uA.ui = 0xfc; |
| 121 | + softfloat_exceptionFlags |= softfloat_flag_infinite; |
| 122 | + break; |
| 123 | + case 0x010: // +0 |
| 124 | + uA.ui = 0x7c; |
| 125 | + softfloat_exceptionFlags |= softfloat_flag_infinite; |
| 126 | + break; |
| 127 | + case 0x080: //+inf |
| 128 | + uA.ui = 0x0; |
| 129 | + break; |
| 130 | + case 0x020: //+ sub |
| 131 | + sub = true; |
| 132 | + default: // +num |
| 133 | + uA.ui = rsqrte7(uA.ui, 5, 2, sub); |
| 134 | + break; |
| 135 | + } |
| 136 | + |
| 137 | + return uA.f; |
| 138 | +} |
| 139 | + |
| 140 | +/*********************************************************************************/ |
| 141 | +static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub, |
| 142 | + bool *round_abnormal) |
| 143 | +{ |
| 144 | + uint64_t exp = extract64(val, s, e); |
| 145 | + uint64_t sig = extract64(val, 0, s); |
| 146 | + uint64_t sign = extract64(val, s + e, 1); |
| 147 | + const int p = 7; |
| 148 | + |
| 149 | + static const uint8_t table[] = { |
| 150 | + 127, 125, 123, 121, 119, 117, 116, 114, |
| 151 | + 112, 110, 109, 107, 105, 104, 102, 100, |
| 152 | + 99, 97, 96, 94, 93, 91, 90, 88, |
| 153 | + 87, 85, 84, 83, 81, 80, 79, 77, |
| 154 | + 76, 75, 74, 72, 71, 70, 69, 68, |
| 155 | + 66, 65, 64, 63, 62, 61, 60, 59, |
| 156 | + 58, 57, 56, 55, 54, 53, 52, 51, |
| 157 | + 50, 49, 48, 47, 46, 45, 44, 43, |
| 158 | + 42, 41, 40, 40, 39, 38, 37, 36, |
| 159 | + 35, 35, 34, 33, 32, 31, 31, 30, |
| 160 | + 29, 28, 28, 27, 26, 25, 25, 24, |
| 161 | + 23, 23, 22, 21, 21, 20, 19, 19, |
| 162 | + 18, 17, 17, 16, 15, 15, 14, 14, |
| 163 | + 13, 12, 12, 11, 11, 10, 9, 9, |
| 164 | + 8, 8, 7, 7, 6, 5, 5, 4, |
| 165 | + 4, 3, 3, 2, 2, 1, 1, 0}; |
| 166 | + |
| 167 | + if (sub) { |
| 168 | + while (extract64(sig, s - 1, 1) == 0) |
| 169 | + exp--, sig <<= 1; |
| 170 | + |
| 171 | + sig = (sig << 1) & make_mask64(0 ,s); |
| 172 | + |
| 173 | + if (exp != 0 && exp != UINT64_MAX) { |
| 174 | + *round_abnormal = true; |
| 175 | + if (rm == 1 || |
| 176 | + (rm == 2 && !sign) || |
| 177 | + (rm == 3 && sign)) |
| 178 | + return ((sign << (s+e)) | make_mask64(s, e)) - 1; |
| 179 | + else |
| 180 | + return (sign << (s+e)) | make_mask64(s, e); |
| 181 | + } |
| 182 | + } |
| 183 | + |
| 184 | + int idx = sig >> (s-p); |
| 185 | + uint64_t out_sig = (uint64_t)(table[idx]) << (s-p); |
| 186 | + uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp; |
| 187 | + if (out_exp == 0 || out_exp == UINT64_MAX) { |
| 188 | + out_sig = (out_sig >> 1) | make_mask64(s - 1, 1); |
| 189 | + if (out_exp == UINT64_MAX) { |
| 190 | + out_sig >>= 1; |
| 191 | + out_exp = 0; |
| 192 | + } |
| 193 | + } |
| 194 | + |
| 195 | + return (sign << (s+e)) | (out_exp << s) | out_sig; |
| 196 | +} |
| 197 | + |
| 198 | +/*********************************************************************************/ |
| 199 | + |
| 200 | + |
| 201 | +float8_1_t f8_1_recip7( float8_1_t in) |
| 202 | +{ |
| 203 | + union ui8_f8_1 uA; |
| 204 | + |
| 205 | + uA.f = in; |
| 206 | + unsigned int ret = f8_1_classify(in); |
| 207 | + bool sub = false; |
| 208 | + bool round_abnormal = false; |
| 209 | + switch(ret) { |
| 210 | + case 0x001: // -inf |
| 211 | + uA.ui = 0x80; |
| 212 | + break; |
| 213 | + case 0x080: //+inf |
| 214 | + uA.ui = 0x0; |
| 215 | + break; |
| 216 | + case 0x008: // -0 |
| 217 | + uA.ui = 0xf8; |
| 218 | + softfloat_exceptionFlags |= softfloat_flag_infinite; |
| 219 | + break; |
| 220 | + case 0x010: // +0 |
| 221 | + uA.ui = 0x78; |
| 222 | + softfloat_exceptionFlags |= softfloat_flag_infinite; |
| 223 | + break; |
| 224 | + case 0x100: // sNaN |
| 225 | + softfloat_exceptionFlags |= softfloat_flag_invalid; |
| 226 | + case 0x200: //qNaN |
| 227 | + uA.ui = defaultNaNF8_1UI; |
| 228 | + break; |
| 229 | + case 0x004: // -subnormal |
| 230 | + case 0x020: //+ sub |
| 231 | + sub = true; |
| 232 | + default: // +- normal |
| 233 | + uA.ui = recip7(uA.ui, 4, 3, |
| 234 | + softfloat_roundingMode, sub, &round_abnormal); |
| 235 | + if (round_abnormal) |
| 236 | + softfloat_exceptionFlags |= softfloat_flag_inexact | |
| 237 | + softfloat_flag_overflow; |
| 238 | + break; |
| 239 | + } |
| 240 | + |
| 241 | + return uA.f; |
| 242 | +} |
| 243 | + |
| 244 | +/*********************************************************************************/ |
| 245 | + |
| 246 | +float8_2_t f8_2_recip7( float8_2_t in) |
| 247 | +{ |
| 248 | + union ui8_f8_2 uA; |
| 249 | + |
| 250 | + uA.f = in; |
| 251 | + unsigned int ret = f8_2_classify(in); |
| 252 | + bool sub = false; |
| 253 | + bool round_abnormal = false; |
| 254 | + switch(ret) { |
| 255 | + case 0x001: // -inf |
| 256 | + uA.ui = 0x80; |
| 257 | + break; |
| 258 | + case 0x080: //+inf |
| 259 | + uA.ui = 0x0; |
| 260 | + break; |
| 261 | + case 0x008: // -0 |
| 262 | + uA.ui = 0xfc; |
| 263 | + softfloat_exceptionFlags |= softfloat_flag_infinite; |
| 264 | + break; |
| 265 | + case 0x010: // +0 |
| 266 | + uA.ui = 0x7c; |
| 267 | + softfloat_exceptionFlags |= softfloat_flag_infinite; |
| 268 | + break; |
| 269 | + case 0x100: // sNaN |
| 270 | + softfloat_exceptionFlags |= softfloat_flag_invalid; |
| 271 | + case 0x200: //qNaN |
| 272 | + uA.ui = defaultNaNF8_2UI; |
| 273 | + break; |
| 274 | + case 0x004: // -subnormal |
| 275 | + case 0x020: //+ sub |
| 276 | + sub = true; |
| 277 | + default: // +- normal |
| 278 | + uA.ui = recip7(uA.ui, 5, 2, |
| 279 | + softfloat_roundingMode, sub, &round_abnormal); |
| 280 | + if (round_abnormal) |
| 281 | + softfloat_exceptionFlags |= softfloat_flag_inexact | |
| 282 | + softfloat_flag_overflow; |
| 283 | + break; |
| 284 | + } |
| 285 | + |
| 286 | + return uA.f; |
| 287 | +} |
0 commit comments