Skip to content

Commit 3095079

Browse files
committed
Add fp8 Functions
1 parent 8b4fffe commit 3095079

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+4337
-0
lines changed
Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
2+
#include <assert.h>
3+
#include <stdbool.h>
4+
#include <stdint.h>
5+
#include "platform.h"
6+
#include "internals.h"
7+
#include "specialize.h"
8+
#include "softfloat.h"
9+
/*********************************************************************************/
10+
/********************************support functions********************************/
11+
/*********************************************************************************/
12+
static inline uint64_t extract64(uint64_t val, int pos, int len)
13+
{
14+
assert(pos >= 0 && len > 0 && len <= 64 - pos);
15+
return (val >> pos) & (~UINT64_C(0) >> (64 - len));
16+
}
17+
18+
static inline uint64_t make_mask64(int pos, int len)
19+
{
20+
assert(pos >= 0 && len > 0 && pos < 64 && len <= 64);
21+
return (UINT64_MAX >> (64 - len)) << pos;
22+
}
23+
24+
/*********************************************************************************/
25+
26+
static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) {
27+
uint64_t exp = extract64(val, s, e);
28+
uint64_t sig = extract64(val, 0, s);
29+
uint64_t sign = extract64(val, s + e, 1);
30+
const int p = 7;
31+
32+
static const uint8_t table[] = {
33+
52, 51, 50, 48, 47, 46, 44, 43,
34+
42, 41, 40, 39, 38, 36, 35, 34,
35+
33, 32, 31, 30, 30, 29, 28, 27,
36+
26, 25, 24, 23, 23, 22, 21, 20,
37+
19, 19, 18, 17, 16, 16, 15, 14,
38+
14, 13, 12, 12, 11, 10, 10, 9,
39+
9, 8, 7, 7, 6, 6, 5, 4,
40+
4, 3, 3, 2, 2, 1, 1, 0,
41+
127, 125, 123, 121, 119, 118, 116, 114,
42+
113, 111, 109, 108, 106, 105, 103, 102,
43+
100, 99, 97, 96, 95, 93, 92, 91,
44+
90, 88, 87, 86, 85, 84, 83, 82,
45+
80, 79, 78, 77, 76, 75, 74, 73,
46+
72, 71, 70, 70, 69, 68, 67, 66,
47+
65, 64, 63, 63, 62, 61, 60, 59,
48+
59, 58, 57, 56, 56, 55, 54, 53};
49+
50+
if (sub) {
51+
while (extract64(sig, s - 1, 1) == 0)
52+
exp--, sig <<= 1;
53+
54+
sig = (sig << 1) & make_mask64(0 ,s);
55+
}
56+
57+
int idx = ((exp & 1) << (p-1)) | (sig >> (s-p+1));
58+
uint64_t out_sig = (uint64_t)(table[idx]) << (s-p);
59+
uint64_t out_exp = (3 * make_mask64(0, e - 1) + ~exp) / 2;
60+
61+
return (sign << (s+e)) | (out_exp << s) | out_sig;
62+
}
63+
64+
/*********************************************************************************/
65+
float8_1_t f8_1_rsqrte7( float8_1_t in)
66+
{
67+
union ui8_f8_1 uA;
68+
69+
uA.f = in;
70+
unsigned int ret = f8_1_classify(in);
71+
bool sub = false;
72+
switch(ret) {
73+
case 0x001: // -inf
74+
case 0x002: // -normal
75+
case 0x004: // -subnormal
76+
case 0x100: // sNaN
77+
softfloat_exceptionFlags |= softfloat_flag_invalid;
78+
case 0x200: //qNaN
79+
uA.ui = defaultNaNF8_1UI;
80+
break;
81+
case 0x008: // -0
82+
uA.ui = 0xf8;
83+
softfloat_exceptionFlags |= softfloat_flag_infinite;
84+
break;
85+
case 0x010: // +0
86+
uA.ui = 0x78;
87+
softfloat_exceptionFlags |= softfloat_flag_infinite;
88+
break;
89+
case 0x080: //+inf
90+
uA.ui = 0x0;
91+
break;
92+
case 0x020: //+ sub
93+
sub = true;
94+
default: // +num
95+
uA.ui = rsqrte7(uA.ui, 4, 3, sub);
96+
break;
97+
}
98+
99+
return uA.f;
100+
}
101+
102+
/*********************************************************************************/
103+
float8_2_t f8_2_rsqrte7( float8_2_t in)
104+
{
105+
union ui8_f8_2 uA;
106+
107+
uA.f = in;
108+
unsigned int ret = f8_2_classify(in);
109+
bool sub = false;
110+
switch(ret) {
111+
case 0x001: // -inf
112+
case 0x002: // -normal
113+
case 0x004: // -subnormal
114+
case 0x100: // sNaN
115+
softfloat_exceptionFlags |= softfloat_flag_invalid;
116+
case 0x200: //qNaN
117+
uA.ui = defaultNaNF8_2UI;
118+
break;
119+
case 0x008: // -0
120+
uA.ui = 0xfc;
121+
softfloat_exceptionFlags |= softfloat_flag_infinite;
122+
break;
123+
case 0x010: // +0
124+
uA.ui = 0x7c;
125+
softfloat_exceptionFlags |= softfloat_flag_infinite;
126+
break;
127+
case 0x080: //+inf
128+
uA.ui = 0x0;
129+
break;
130+
case 0x020: //+ sub
131+
sub = true;
132+
default: // +num
133+
uA.ui = rsqrte7(uA.ui, 5, 2, sub);
134+
break;
135+
}
136+
137+
return uA.f;
138+
}
139+
140+
/*********************************************************************************/
141+
static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub,
142+
bool *round_abnormal)
143+
{
144+
uint64_t exp = extract64(val, s, e);
145+
uint64_t sig = extract64(val, 0, s);
146+
uint64_t sign = extract64(val, s + e, 1);
147+
const int p = 7;
148+
149+
static const uint8_t table[] = {
150+
127, 125, 123, 121, 119, 117, 116, 114,
151+
112, 110, 109, 107, 105, 104, 102, 100,
152+
99, 97, 96, 94, 93, 91, 90, 88,
153+
87, 85, 84, 83, 81, 80, 79, 77,
154+
76, 75, 74, 72, 71, 70, 69, 68,
155+
66, 65, 64, 63, 62, 61, 60, 59,
156+
58, 57, 56, 55, 54, 53, 52, 51,
157+
50, 49, 48, 47, 46, 45, 44, 43,
158+
42, 41, 40, 40, 39, 38, 37, 36,
159+
35, 35, 34, 33, 32, 31, 31, 30,
160+
29, 28, 28, 27, 26, 25, 25, 24,
161+
23, 23, 22, 21, 21, 20, 19, 19,
162+
18, 17, 17, 16, 15, 15, 14, 14,
163+
13, 12, 12, 11, 11, 10, 9, 9,
164+
8, 8, 7, 7, 6, 5, 5, 4,
165+
4, 3, 3, 2, 2, 1, 1, 0};
166+
167+
if (sub) {
168+
while (extract64(sig, s - 1, 1) == 0)
169+
exp--, sig <<= 1;
170+
171+
sig = (sig << 1) & make_mask64(0 ,s);
172+
173+
if (exp != 0 && exp != UINT64_MAX) {
174+
*round_abnormal = true;
175+
if (rm == 1 ||
176+
(rm == 2 && !sign) ||
177+
(rm == 3 && sign))
178+
return ((sign << (s+e)) | make_mask64(s, e)) - 1;
179+
else
180+
return (sign << (s+e)) | make_mask64(s, e);
181+
}
182+
}
183+
184+
int idx = sig >> (s-p);
185+
uint64_t out_sig = (uint64_t)(table[idx]) << (s-p);
186+
uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp;
187+
if (out_exp == 0 || out_exp == UINT64_MAX) {
188+
out_sig = (out_sig >> 1) | make_mask64(s - 1, 1);
189+
if (out_exp == UINT64_MAX) {
190+
out_sig >>= 1;
191+
out_exp = 0;
192+
}
193+
}
194+
195+
return (sign << (s+e)) | (out_exp << s) | out_sig;
196+
}
197+
198+
/*********************************************************************************/
199+
200+
201+
float8_1_t f8_1_recip7( float8_1_t in)
202+
{
203+
union ui8_f8_1 uA;
204+
205+
uA.f = in;
206+
unsigned int ret = f8_1_classify(in);
207+
bool sub = false;
208+
bool round_abnormal = false;
209+
switch(ret) {
210+
case 0x001: // -inf
211+
uA.ui = 0x80;
212+
break;
213+
case 0x080: //+inf
214+
uA.ui = 0x0;
215+
break;
216+
case 0x008: // -0
217+
uA.ui = 0xf8;
218+
softfloat_exceptionFlags |= softfloat_flag_infinite;
219+
break;
220+
case 0x010: // +0
221+
uA.ui = 0x78;
222+
softfloat_exceptionFlags |= softfloat_flag_infinite;
223+
break;
224+
case 0x100: // sNaN
225+
softfloat_exceptionFlags |= softfloat_flag_invalid;
226+
case 0x200: //qNaN
227+
uA.ui = defaultNaNF8_1UI;
228+
break;
229+
case 0x004: // -subnormal
230+
case 0x020: //+ sub
231+
sub = true;
232+
default: // +- normal
233+
uA.ui = recip7(uA.ui, 4, 3,
234+
softfloat_roundingMode, sub, &round_abnormal);
235+
if (round_abnormal)
236+
softfloat_exceptionFlags |= softfloat_flag_inexact |
237+
softfloat_flag_overflow;
238+
break;
239+
}
240+
241+
return uA.f;
242+
}
243+
244+
/*********************************************************************************/
245+
246+
float8_2_t f8_2_recip7( float8_2_t in)
247+
{
248+
union ui8_f8_2 uA;
249+
250+
uA.f = in;
251+
unsigned int ret = f8_2_classify(in);
252+
bool sub = false;
253+
bool round_abnormal = false;
254+
switch(ret) {
255+
case 0x001: // -inf
256+
uA.ui = 0x80;
257+
break;
258+
case 0x080: //+inf
259+
uA.ui = 0x0;
260+
break;
261+
case 0x008: // -0
262+
uA.ui = 0xfc;
263+
softfloat_exceptionFlags |= softfloat_flag_infinite;
264+
break;
265+
case 0x010: // +0
266+
uA.ui = 0x7c;
267+
softfloat_exceptionFlags |= softfloat_flag_infinite;
268+
break;
269+
case 0x100: // sNaN
270+
softfloat_exceptionFlags |= softfloat_flag_invalid;
271+
case 0x200: //qNaN
272+
uA.ui = defaultNaNF8_2UI;
273+
break;
274+
case 0x004: // -subnormal
275+
case 0x020: //+ sub
276+
sub = true;
277+
default: // +- normal
278+
uA.ui = recip7(uA.ui, 5, 2,
279+
softfloat_roundingMode, sub, &round_abnormal);
280+
if (round_abnormal)
281+
softfloat_exceptionFlags |= softfloat_flag_inexact |
282+
softfloat_flag_overflow;
283+
break;
284+
}
285+
286+
return uA.f;
287+
}

softfloat_8/f8_1_add.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
2+
#include <stdbool.h>
3+
#include <stdint.h>
4+
#include "platform.h"
5+
#include "internals.h"
6+
#include "softfloat.h"
7+
8+
float8_1_t f8_1_add( float8_1_t a, float8_1_t b )
9+
{
10+
union ui8_f8_1 uA;
11+
uint_fast8_t uiA;
12+
union ui8_f8_1 uB;
13+
uint_fast8_t uiB;
14+
#if ! defined INLINE_LEVEL || (INLINE_LEVEL < 1)
15+
float8_1_t (*magsFuncPtr)( uint_fast8_t, uint_fast8_t );
16+
#endif
17+
18+
uA.f = a;
19+
uiA = uA.ui;
20+
uB.f = b;
21+
uiB = uB.ui;
22+
#if defined INLINE_LEVEL && (1 <= INLINE_LEVEL)
23+
if ( signF8_1UI( uiA ^ uiB ) ) {
24+
return softfloat_subMagsF8_1( uiA, uiB );
25+
} else {
26+
return softfloat_addMagsF8_1( uiA, uiB );
27+
}
28+
#else
29+
magsFuncPtr =
30+
signF8_1UI( uiA ^ uiB ) ? softfloat_subMagsF8_1 : softfloat_addMagsF8_1;
31+
return (*magsFuncPtr)( uiA, uiB );
32+
#endif
33+
34+
}
35+

softfloat_8/f8_1_classify.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
2+
#include <stdbool.h>
3+
#include <stdint.h>
4+
#include "platform.h"
5+
#include "internals.h"
6+
#include "specialize.h"
7+
#include "softfloat.h"
8+
9+
10+
uint_fast16_t f8_1_classify( float8_1_t a )
11+
{
12+
union ui8_f8_1 uA;
13+
uint_fast8_t uiA;
14+
15+
uA.f = a;
16+
uiA = uA.ui;
17+
18+
uint_fast16_t infOrNaN = expF8_1UI( uiA ) == 0x0F;
19+
uint_fast16_t subnormalOrZero = expF8_1UI( uiA ) == 0;
20+
bool sign = signF8_1UI( uiA );
21+
bool fracZero = fracF8_1UI( uiA ) == 0;
22+
bool isNaN = isNaNF8_1UI( uiA );
23+
bool isSNaN = softfloat_isSigNaNF8_1UI( uiA );
24+
25+
return
26+
( sign && infOrNaN && fracZero ) << 0 |
27+
( sign && !infOrNaN && !subnormalOrZero ) << 1 |
28+
( sign && subnormalOrZero && !fracZero ) << 2 |
29+
( sign && subnormalOrZero && fracZero ) << 3 |
30+
( !sign && infOrNaN && fracZero ) << 7 |
31+
( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
32+
( !sign && subnormalOrZero && !fracZero ) << 5 |
33+
( !sign && subnormalOrZero && fracZero ) << 4 |
34+
( isNaN && isSNaN ) << 8 |
35+
( isNaN && !isSNaN ) << 9;
36+
}
37+

0 commit comments

Comments
 (0)