-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfloat16.go
More file actions
100 lines (91 loc) · 2 KB
/
float16.go
File metadata and controls
100 lines (91 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
package main
import "math"
// float32ToFloat16 converts a slice of float32 values into IEEE 754-2008 binary16
// representation stored in dst. dst must be at least len(src).
func float32ToFloat16(dst []uint16, src []float32) {
for i, v := range src {
dst[i] = float32ToFloat16Bits(v)
}
}
// float16ToFloat32 expands IEEE 754-2008 binary16 data into float32 values.
// dst must be at least len(src).
func float16ToFloat32(dst []float32, src []uint16) {
for i, v := range src {
dst[i] = float16BitsToFloat32(v)
}
}
func float32ToFloat16Bits(f float32) uint16 {
bits := math.Float32bits(f)
sign := uint16((bits >> 16) & 0x8000)
exp := int((bits >> 23) & 0xff)
mant := bits & 0x7fffff
switch exp {
case 0xff:
// Preserve NaN payloads where possible.
if mant == 0 {
return sign | 0x7c00
}
mant >>= 13
if mant == 0 {
mant = 1
}
return sign | 0x7c00 | uint16(mant)
case 0:
if mant == 0 {
return sign
}
}
expHalf := exp - 127 + 15
if expHalf >= 0x1f {
return sign | 0x7c00
}
mant32 := mant
if expHalf <= 0 {
if expHalf < -10 {
return sign
}
mant32 |= 0x800000
shift := uint(1 - expHalf)
mant32 >>= shift
mant32 += 0x00001000
return sign | uint16(mant32>>13)
}
mant32 += 0x00001000
if mant32&0x00800000 != 0 {
mant32 = 0
expHalf++
if expHalf >= 0x1f {
return sign | 0x7c00
}
}
return sign | uint16(expHalf<<10) | uint16(mant32>>13)
}
func float16BitsToFloat32(h uint16) float32 {
sign := uint32(h>>15) << 31
exp := int((h >> 10) & 0x1f)
mant := uint32(h & 0x3ff)
switch exp {
case 0:
if mant == 0 {
return math.Float32frombits(sign)
}
exp = -14
for mant&0x400 == 0 {
mant <<= 1
exp--
}
mant &= 0x3ff
bits := sign | uint32((exp+127)<<23) | (mant << 13)
return math.Float32frombits(bits)
case 0x1f:
bits := sign | 0x7f800000 | (mant << 13)
if mant != 0 {
bits |= 1
}
return math.Float32frombits(bits)
default:
exp = exp - 15 + 127
bits := sign | uint32(exp<<23) | (mant << 13)
return math.Float32frombits(bits)
}
}