Skip to content

Commit 7fa1332

Browse files
committed
Move F16 table to a better place
1 parent 00d8694 commit 7fa1332

File tree

1 file changed

+39
-39
lines changed

1 file changed

+39
-39
lines changed

base/float.jl

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,45 @@ function Float32(x::Int128)
137137
reinterpret(Float32, s | d + y)
138138
end
139139

140+
# Float32 -> Float16 algorithm from:
141+
# "Fast Half Float Conversion" by Jeroen van der Zijp
142+
# ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
143+
144+
let _basetable = Vector{UInt16}(undef, 512),
145+
_shifttable = Vector{UInt8}(undef, 512)
146+
for i = 0:255
147+
e = i - 127
148+
if e < -24 # Very small numbers map to zero
149+
_basetable[i|0x000+1] = 0x0000
150+
_basetable[i|0x100+1] = 0x8000
151+
_shifttable[i|0x000+1] = 24
152+
_shifttable[i|0x100+1] = 24
153+
elseif e < -14 # Small numbers map to denorms
154+
_basetable[i|0x000+1] = (0x0400>>(-e-14))
155+
_basetable[i|0x100+1] = (0x0400>>(-e-14)) | 0x8000
156+
_shifttable[i|0x000+1] = -e-1
157+
_shifttable[i|0x100+1] = -e-1
158+
elseif e <= 15 # Normal numbers just lose precision
159+
_basetable[i|0x000+1] = ((e+15)<<10)
160+
_basetable[i|0x100+1] = ((e+15)<<10) | 0x8000
161+
_shifttable[i|0x000+1] = 13
162+
_shifttable[i|0x100+1] = 13
163+
elseif e < 128 # Large numbers map to Infinity
164+
_basetable[i|0x000+1] = 0x7C00
165+
_basetable[i|0x100+1] = 0xFC00
166+
_shifttable[i|0x000+1] = 24
167+
_shifttable[i|0x100+1] = 24
168+
else # Infinity and NaN's stay Infinity and NaN's
169+
_basetable[i|0x000+1] = 0x7C00
170+
_basetable[i|0x100+1] = 0xFC00
171+
_shifttable[i|0x000+1] = 13
172+
_shifttable[i|0x100+1] = 13
173+
end
174+
end
175+
global const shifttable = (_shifttable...,)
176+
global const basetable = (_basetable...,)
177+
end
178+
140179
function Float16(val::Float32)
141180
f = reinterpret(UInt32, val)
142181
if isnan(val)
@@ -202,45 +241,6 @@ function Float32(val::Float16)
202241
return reinterpret(Float32, ret)
203242
end
204243

205-
# Float32 -> Float16 algorithm from:
206-
# "Fast Half Float Conversion" by Jeroen van der Zijp
207-
# ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
208-
209-
let _basetable = Vector{UInt16}(undef, 512),
210-
_shifttable = Vector{UInt8}(undef, 512)
211-
for i = 0:255
212-
e = i - 127
213-
if e < -24 # Very small numbers map to zero
214-
_basetable[i|0x000+1] = 0x0000
215-
_basetable[i|0x100+1] = 0x8000
216-
_shifttable[i|0x000+1] = 24
217-
_shifttable[i|0x100+1] = 24
218-
elseif e < -14 # Small numbers map to denorms
219-
_basetable[i|0x000+1] = (0x0400>>(-e-14))
220-
_basetable[i|0x100+1] = (0x0400>>(-e-14)) | 0x8000
221-
_shifttable[i|0x000+1] = -e-1
222-
_shifttable[i|0x100+1] = -e-1
223-
elseif e <= 15 # Normal numbers just lose precision
224-
_basetable[i|0x000+1] = ((e+15)<<10)
225-
_basetable[i|0x100+1] = ((e+15)<<10) | 0x8000
226-
_shifttable[i|0x000+1] = 13
227-
_shifttable[i|0x100+1] = 13
228-
elseif e < 128 # Large numbers map to Infinity
229-
_basetable[i|0x000+1] = 0x7C00
230-
_basetable[i|0x100+1] = 0xFC00
231-
_shifttable[i|0x000+1] = 24
232-
_shifttable[i|0x100+1] = 24
233-
else # Infinity and NaN's stay Infinity and NaN's
234-
_basetable[i|0x000+1] = 0x7C00
235-
_basetable[i|0x100+1] = 0xFC00
236-
_shifttable[i|0x000+1] = 13
237-
_shifttable[i|0x100+1] = 13
238-
end
239-
end
240-
global const shifttable = (_shifttable...,)
241-
global const basetable = (_basetable...,)
242-
end
243-
244244
#convert(::Type{Float16}, x::Float32) = fptrunc(Float16, x)
245245
Float32(x::Float64) = fptrunc(Float32, x)
246246
Float16(x::Float64) = Float16(Float32(x))

0 commit comments

Comments
 (0)