Skip to content

Commit 303eaff

Browse files
authored
add CUDA support, batched transform, change real/complex transform type
2 parents 9ffc35a + 9e53c23 commit 303eaff

File tree

11 files changed

+461
-108
lines changed

11 files changed

+461
-108
lines changed

Project.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@ version = "0.2.0"
66
[deps]
77
SHTns_jll = "daf09cc5-9ab3-509e-9618-0b89086eb825"
88

9+
[weakdeps]
10+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
11+
12+
[extensions]
13+
SHTnsCUDAExt = "CUDA"
14+
915
[compat]
1016
julia = "1.6"
1117

ext/SHTnsCUDAExt/SHTnsCUDAExt.jl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
module SHTnsCUDAExt
2+
3+
using CUDA
4+
using SHTns
5+
6+
import SHTns: libshtns
7+
import SHTns: synth, synth!, analys, analys!
8+
9+
__init__() = @assert CUDA.functional()
10+
11+
include("sht.jl")
12+
include("synth.jl")
13+
include("analys.jl")
14+
15+
end #module

ext/SHTnsCUDAExt/analys.jl

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
function analys(cfg::SHTnsCfg, v::CuArray{Float64})
2+
@assert cfg.shtype.gpu
3+
@assert cfg.nlat != 0
4+
qlm = CuVector{ComplexF64}(undef, cfg.nlm*cfg.howmany)
5+
analys!(cfg, copy(v), qlm)
6+
return qlm
7+
end
8+
9+
function analys(cfg::SHTnsCfg, v::CuArray{ComplexF64})
10+
@assert cfg.shtype.gpu
11+
@assert cfg.nlat != 0
12+
qlm = CuVector{ComplexF64}(undef, cfg.nlm_cplx*cfg.howmany)
13+
analys!(cfg, copy(v), qlm)
14+
return qlm
15+
end
16+
17+
function analys(cfg::SHTnsCfg, utheta::CuArray{Float64}, uphi::CuArray{Float64})
18+
@assert cfg.shtype.gpu
19+
@assert cfg.nlat != 0
20+
slm = CuVector{ComplexF64}(undef, cfg.nlm*cfg.howmany)
21+
tlm = CuVector{ComplexF64}(undef, cfg.nlm*cfg.howmany)
22+
analys!(cfg, copy(utheta), copy(uphi), slm, tlm)
23+
return slm, tlm
24+
end
25+
26+
function analys(cfg::SHTnsCfg, utheta::CuArray{ComplexF64}, uphi::CuArray{ComplexF64})
27+
@assert cfg.shtype.gpu
28+
@assert cfg.nlat != 0
29+
slm = CuVector{ComplexF64}(undef, cfg.nlm_cplx*cfg.howmany)
30+
tlm = CuVector{ComplexF64}(undef, cfg.nlm_cplx*cfg.howmany)
31+
analys!(cfg, copy(utheta), copy(uphi), slm, tlm)
32+
return slm, tlm
33+
end
34+
35+
function analys(cfg::SHTnsCfg, ur::CuArray{Float64}, utheta::CuArray{Float64}, uphi::CuArray{Float64})
36+
@assert cfg.shtype.gpu
37+
@assert cfg.nlat != 0
38+
qlm = CuVector{ComplexF64}(undef, cfg.nlm*cfg.howmany)
39+
slm = CuVector{ComplexF64}(undef, cfg.nlm*cfg.howmany)
40+
tlm = CuVector{ComplexF64}(undef, cfg.nlm*cfg.howmany)
41+
analys!(cfg, copy(ur), copy(utheta), copy(uphi), qlm, slm, tlm)
42+
return qlm, slm, tlm
43+
end
44+
45+
function analys(cfg::SHTnsCfg, ur::CuArray{ComplexF64}, utheta::CuArray{ComplexF64}, uphi::CuArray{ComplexF64})
46+
@assert cfg.shtype.gpu
47+
@assert cfg.nlat != 0
48+
qlm = CuVector{ComplexF64}(undef, cfg.nlm_cplx*cfg.howmany)
49+
slm = CuVector{ComplexF64}(undef, cfg.nlm_cplx*cfg.howmany)
50+
tlm = CuVector{ComplexF64}(undef, cfg.nlm_cplx*cfg.howmany)
51+
analys!(cfg, copy(ur), copy(utheta), copy(uphi), qlm, slm, tlm)
52+
return qlm, slm, tlm
53+
end
54+
55+
function analys!(cfg::SHTnsCfg, v::CuArray{Float64}, qlm::CuVector{ComplexF64})
56+
@assert cfg.shtype.gpu
57+
return cu_spat_to_SH(cfg.cfg, v, qlm, cfg.lmax)
58+
end
59+
60+
61+
function analys!(cfg::SHTnsCfg, utheta::T, uphi::T, slm::CuVector{ComplexF64}, tlm::CuVector{ComplexF64}) where {T<:CuArray{Float64}}
62+
@assert cfg.shtype.gpu
63+
return cu_spat_to_SHsphtor(cfg.cfg, utheta, uphi, slm, tlm, cfg.lmax)
64+
end
65+
66+
function analys!(cfg::SHTnsCfg, ur::T, utheta::T, uphi::T, qlm::CuVector{ComplexF64}, slm::CuVector{ComplexF64}, tlm::CuVector{ComplexF64}) where {T<:CuArray{Float64}}
67+
@assert cfg.shtype.gpu
68+
return cu_spat_to_SHqst(cfg.cfg, ur, utheta, uphi, qlm, slm, tlm, cfg.lmax)
69+
end
70+
71+
#complex to complex not available for CUDA (status: SHTns v3.7)
72+
73+
# function analys!(cfg::SHTnsCfg, v::CuArray{ComplexF64}, qlm::CuVector{ComplexF64})
74+
# return cu_spat_cplx_to_SH(cfg.cfg, v, qlm)
75+
# end
76+
77+
# function analys!(cfg::SHTnsCfg, utheta::T, uphi::T, slm::CuVector{ComplexF64}, tlm::CuVector{ComplexF64}) where {T<:CuArray{ComplexF64}}
78+
# return cu_spat_cplx_to_SHsphtor(cfg.cfg, utheta, uphi, slm, tlm)
79+
# end
80+
81+
# function analys!(cfg::SHTnsCfg, ur::T, utheta::T, uphi::T, qlm::CuVector{ComplexF64}, slm::CuVector{ComplexF64}, tlm::CuVector{ComplexF64}) where {T<:CuArray{ComplexF64}}
82+
# return cu_spat_cplx_to_SHqst(cfg.cfg, ur, utheta, uphi, qlm, slm, tlm)
83+
# end

ext/SHTnsCUDAExt/sht.jl

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
function cu_spat_to_SH(cfg, Vr::CuMatrix{Float64}, Qlm::CuVector{Complex{Float64}}, lmax)
2+
ccall((:cu_spat_to_SH, libshtns[]), Nothing, (shtns_cfg, CuPtr{Float64}, CuPtr{Complex{Float64}}, Clong), cfg, Vr, Qlm, lmax)
3+
end
4+
5+
function cu_SH_to_spat(cfg, Qlm::CuVector{Complex{Float64}}, Vr::CuMatrix{Float64}, lmax)
6+
ccall((:cu_SH_to_spat, libshtns[]), Nothing, (shtns_cfg, CuPtr{Complex{Float64}}, CuPtr{Float64}, Clong), cfg, Qlm, Vr, lmax)
7+
end
8+
9+
function cu_spat_to_SHsphtor(cfg, Vt::CuMatrix{Float64}, Vp::CuMatrix{Float64}, Slm::CuVector{Complex{Float64}}, Tlm::CuVector{Complex{Float64}}, lmax)
10+
ccall((:cu_spat_to_SHsphtor, libshtns[]), Nothing, (shtns_cfg,CuPtr{Float64},CuPtr{Float64},CuPtr{ComplexF64},CuPtr{ComplexF64}, Clong), cfg, Vt, Vp, Slm, Tlm, lmax)
11+
end
12+
13+
function cu_SHsphtor_to_spat(cfg, Slm::CuVector{Complex{Float64}}, Tlm::CuVector{Complex{Float64}}, Vt::CuMatrix{Float64}, Vp::CuMatrix{Float64}, lmax)
14+
ccall((:cu_SHsphtor_to_spat, libshtns[]), Nothing, (shtns_cfg,CuPtr{ComplexF64},CuPtr{ComplexF64},CuPtr{Float64},CuPtr{Float64}, Clong), cfg, Slm, Tlm, Vt, Vp, lmax)
15+
end
16+
17+
function spat_to_SHqst(cfg, Vr::CuMatrix{Float64}, Vt::CuMatrix{Float64}, Vp::CuMatrix{Float64}, Qlm::CuVector{Complex{Float64}}, Slm::CuVector{Complex{Float64}}, Tlm::CuVector{Complex{Float64}}, lmax)
18+
ccall((:cu_spat_to_SHqst, libshtns[]), Nothing, (shtns_cfg,CuPtr{Float64},CuPtr{Float64},CuPtr{Float64},CuPtr{ComplexF64},CuPtr{ComplexF64},CuPtr{ComplexF64}, Clong), cfg, Vr, Vt, Vp, Qlm, Slm, Tlm, lmax)
19+
end
20+
21+
function SHqst_to_spat(cfg, Qlm::CuVector{Complex{Float64}}, Slm::CuVector{Complex{Float64}}, Tlm::CuVector{Complex{Float64}}, Vr::CuMatrix{Float64}, Vt::CuMatrix{Float64}, Vp::CuMatrix{Float64}, lmax)
22+
ccall((:cu_SHqst_to_spat, libshtns[]), Nothing, (shtns_cfg,CuPtr{ComplexF64},CuPtr{ComplexF64},CuPtr{ComplexF64},CuPtr{Float64},CuPtr{Float64},CuPtr{Float64}, Clong), cfg, Qlm, Slm, Tlm, Vr, Vt, Vp, lmax)
23+
end
24+
25+

ext/SHTnsCUDAExt/synth.jl

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
function synth(cfg::SHTnsCfg{TR,T,N}, qlm::CuVector{ComplexF64}) where {TR,T,N}
2+
Tv = TR == Real ? Float64 : ComplexF64
3+
@assert cfg.shtype.gpu
4+
@assert cfg.nlat != 0
5+
@assert length(qlm) == nlm(cfg)*cfg.howmany
6+
7+
nx = cfg.shtype.contiguous_phi ? cfg.nphi : cfg.nlat_padded
8+
ny = cfg.shtype.contiguous_phi ? cfg.nlat_padded : cfg.nphi
9+
10+
v = CuMatrix{Tv}(undef, nx, ny)
11+
synth!(cfg, qlm, v)
12+
return v
13+
end
14+
15+
function synth(cfg::SHTnsCfg{TR,T,N}, slm::CuVector{ComplexF64}, tlm::CuVector{ComplexF64}) where {TR,T,N}
16+
Tv = TR == Real ? Float64 : ComplexF64
17+
@assert cfg.shtype.gpu
18+
@assert cfg.nlat != 0
19+
@assert length(slm) == length(tlm) == nlm(cfg)*cfg.howmany
20+
21+
nx = cfg.shtype.contiguous_phi ? cfg.nphi : cfg.nlat_padded
22+
ny = cfg.shtype.contiguous_phi ? cfg.nlat_padded : cfg.nphi
23+
24+
utheta = CuMatrix{Tv}(undef, nx, ny)
25+
uphi = CuMatrix{Tv}(undef, nx, ny)
26+
synth!(cfg, slm, tlm, utheta, uphi)
27+
return utheta, uphi
28+
end
29+
30+
function synth(cfg::SHTnsCfg{TR,T,N}, qlm::CuVector{ComplexF64}, slm::CuVector{ComplexF64}, tlm::CuVector{ComplexF64}) where {TR,T,N}
31+
Tv = TR == Real ? Float64 : ComplexF64
32+
@assert cfg.shtype.gpu
33+
@assert cfg.nlat != 0
34+
@assert length(qlm) == length(slm) == length(tlm) == nlm(cfg)*cfg.howmany
35+
nx = cfg.shtype.contiguous_phi ? cfg.nphi : cfg.nlat_padded
36+
ny = cfg.shtype.contiguous_phi ? cfg.nlat_padded : cfg.nphi
37+
38+
ur = CuMatrix{Tv}(undef, nx, ny)
39+
utheta = CuMatrix{Tv}(undef, nx, ny)
40+
uphi = CuMatrix{Tv}(undef, nx, ny)
41+
synth!(cfg, qlm, slm, tlm, ur, utheta, uphi)
42+
return ur, utheta, uphi
43+
end
44+
45+
46+
function synth!(cfg::SHTnsCfg{Real,T,N}, qlm::CuVector{ComplexF64}, v::CuMatrix{Float64}) where {T,N}
47+
@assert cfg.shtype.gpu
48+
cu_SH_to_spat(cfg.cfg, qlm, v, cfg.lmax)
49+
return v
50+
end
51+
52+
function synth!(cfg::SHTnsCfg{Real,T,N}, slm::CuVector{ComplexF64}, tlm::CuVector{ComplexF64}, utheta::Tv, uphi::Tv) where {T,N,Tv<:CuMatrix{Float64}}
53+
@assert cfg.shtype.gpu
54+
cu_SHsphtor_to_spat(cfg.cfg, slm, tlm, utheta, uphi, cfg.lmax)
55+
return utheta, uphi
56+
end
57+
58+
function synth!(cfg::SHTnsCfg{Real,T,N}, qlm::CuVector{ComplexF64}, slm::CuVector{ComplexF64}, tlm::CuVector{ComplexF64}, ur::Tv, utheta::Tv, uphi::Tv) where {T,N,Tv<:CuMatrix{Float64}}
59+
@assert cfg.shtype.gpu
60+
cu_SHqst_to_spat(cfg.cfg, qlm, slm, tlm, ur, utheta, uphi, cfg.lmax)
61+
return ur, utheta, uphi
62+
end
63+
64+
65+
#complex to complex not available for CUDA (status: SHTns v3.7)
66+
67+
# function synth_cplx(cfg::SHTnsCfg, qlm::CuVector{ComplexF64})
68+
# @assert cfg.shtype.gpu
69+
# @assert cfg.nlat != 0
70+
# @assert length(qlm) == cfg.nlm_cplx
71+
# @assert cfg.lmax == cfg.mmax
72+
73+
# nx = cfg.shtype.contiguous_phi ? cfg.nphi : cfg.nlat_padded
74+
# ny = cfg.shtype.contiguous_phi ? cfg.nlat_padded : cfg.nphi
75+
76+
# v = CuMatrix{ComplexF64}(undef, nx, ny)
77+
# synth!(cfg, qlm, v)
78+
# return v
79+
# end
80+
81+
# function synth_cplx(cfg::SHTnsCfg, slm::CuVector{ComplexF64}, tlm::CuVector{ComplexF64})
82+
# @assert cfg.shtype.gpu
83+
# @assert cfg.nlat != 0
84+
# @assert length(slm) == length(tlm) == cfg.nlm_cplx
85+
# @assert cfg.lmax == cfg.mmax
86+
87+
# nx = cfg.shtype.contiguous_phi ? cfg.nphi : cfg.nlat_padded
88+
# ny = cfg.shtype.contiguous_phi ? cfg.nlat_padded : cfg.nphi
89+
90+
# utheta = CuMatrix{ComplexF64}(undef, nx, ny)
91+
# uphi = CuMatrix{ComplexF64}(undef, nx, ny)
92+
# synth!(cfg, slm, tlm, utheta, uphi)
93+
# return utheta, uphi
94+
# end
95+
96+
# function synth_cplx(cfg::SHTnsCfg, qlm::CuVector{ComplexF64}, slm::CuVector{ComplexF64}, tlm::CuVector{ComplexF64})
97+
# @assert cfg.shtype.gpu
98+
# @assert cfg.nlat != 0
99+
# @assert length(qlm) == length(slm) == length(tlm) == cfg.nlm_cplx
100+
# @assert cfg.lmax == cfg.mmax
101+
102+
# nx = cfg.shtype.contiguous_phi ? cfg.nphi : cfg.nlat_padded
103+
# ny = cfg.shtype.contiguous_phi ? cfg.nlat_padded : cfg.nphi
104+
105+
# ur = CuMatrix{ComplexF64}(undef, nx, ny)
106+
# utheta = CuMatrix{ComplexF64}(undef, nx, ny)
107+
# uphi = CuMatrix{ComplexF64}(undef, nx, ny)
108+
# synth!(cfg, qlm, slm, tlm, ur, utheta, uphi)
109+
# return ur, utheta, uphi
110+
# end
111+
112+
# function synth!(cfg::SHTnsCfg, qlm::CuVector{ComplexF64}, v::CuMatrix{ComplexF64})
113+
# cu_SH_to_spat_cplx(cfg.cfg, qlm, v, cfg.lmax)
114+
# return v
115+
# end
116+
117+
# function synth!(cfg::SHTnsCfg, slm::CuVector{ComplexF64}, tlm::CuVector{ComplexF64}, utheta::T, uphi::T) where {T<:CuMatrix{ComplexF64}}
118+
# cu_SHsphtor_to_spat_cplx(cfg.cfg, slm, tlm, utheta, uphi, cfg.lmax)
119+
# return utheta, uphi
120+
# end
121+
122+
# function synth!(cfg::SHTnsCfg, qlm::CuVector{ComplexF64}, slm::CuVector{ComplexF64}, tlm::CuVector{ComplexF64}, ur::T, utheta::T, uphi::T) where {T<:CuMatrix{ComplexF64}}
123+
# cu_SHqst_to_spat_cplx(cfg.cfg, qlm, slm, tlm, ur, utheta, uphi, cfg.lmax)
124+
# return ur, utheta, uphi
125+
# end
126+

src/SHTns.jl

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,16 +82,22 @@ for (type, enumtype) in [(:Gauss, :sht_gauss), (:RegFast, :sht_reg_fast), (:RegD
8282
8383
"""
8484
Base.@kwdef struct $(type)<:SHTnsType
85-
contiguous_lat::Bool=false
85+
contiguous_lat::Bool=true
8686
contiguous_phi::Bool=false
8787
padding::Bool=false
88+
gpu::Bool=false
89+
southpolefirst::Bool=false
90+
float32::Bool=false
8891
end
8992

9093
function Base.convert(::Type{shtns_type}, x::$(type))
9194
shtype = $(enumtype)
92-
x.contiguous_lat && (shtype += SHT_THETA_CONTIGUOUS)
9395
x.contiguous_phi && (shtype += SHT_PHI_CONTIGUOUS)
9496
x.padding && (shtype += SHT_ALLOW_PADDING)
97+
x.gpu && (shtype += SHT_ALLOW_GPU)
98+
x.contiguous_lat && (shtype += SHT_THETA_CONTIGUOUS)
99+
x.southpolefirst && (shtype += SHT_SOUTH_POLE_FIRST)
100+
x.float32 && (shtype += SHT_FP32)
95101
return shtype
96102
end
97103
end
@@ -115,7 +121,7 @@ end
115121
116122
Configuration of spherical harmonic transform.
117123
"""
118-
mutable struct SHTnsCfg{N<:SHTnsNorm, T<:SHTnsType}
124+
mutable struct SHTnsCfg{TR<:Union{Real,Complex}, N<:SHTnsNorm, T<:SHTnsType}
119125
cfg::Ptr{shtns_info}
120126
norm::N
121127
shtype::T
@@ -134,23 +140,32 @@ mutable struct SHTnsCfg{N<:SHTnsNorm, T<:SHTnsType}
134140
st::Vector{Float64}
135141
nlat_padded::Int
136142
nlm_cplx::Int
143+
howmany::Int
137144
function SHTnsCfg(lmax, mmax, mres, nlat, nphi;
138145
shtype::T=QuickInit(),
139146
norm::N=Orthonormal(),
140147
eps=1e-10,
141148
robert_form=false,
149+
howmany = 1,
150+
transform::Union{Type{Real}, Type{Complex}} = Real
142151
) where {T<:SHTnsType, N<:SHTnsNorm}
143152

144153
_init_checks(shtype, lmax, mmax, mres, nlat, nphi)
145154
cfg = shtns_create(lmax, mmax, mres, norm)
146155
robert_form && shtns_robert_form(cfg,1)
156+
if howmany > 1
157+
@assert transform == Real "Only real transform is supported for batched transforms"
158+
info = unsafe_load(cfg)
159+
spec_dist = transform == Real ? info.nlm : info.nlm_cplx
160+
shtns_set_many(cfg, howmany, spec_dist)
161+
end
147162
shtns_set_grid(cfg, shtype, eps, nlat, nphi)
148163
info = unsafe_load(cfg)
149164
li = Vector{Int}(unsafe_wrap(Vector{Cushort},info.li,Int(info.nlm)))
150165
mi = Vector{Int}(unsafe_wrap(Vector{Cushort},info.mi,Int(info.nlm)))
151166
ct = Vector{Float64}(unsafe_wrap(Vector{Cdouble},info.ct,Int(info.nlat)))
152167
st = Vector{Float64}(unsafe_wrap(Vector{Cdouble},info.st,Int(info.nlat)))
153-
stream = new{N,T}(cfg, norm, shtype, robert_form, info.nlm, info.lmax, info.mmax, info.mres, info.nlat_2, info.nlat, info.nphi, info.nspat, li, mi, ct, st, info.nlat_padded, info.nlm_cplx)
168+
stream = new{transform,N,T}(cfg, norm, shtype, robert_form, info.nlm, info.lmax, info.mmax, info.mres, info.nlat_2, info.nlat, info.nphi, info.nspat, li, mi, ct, st, info.nlat_padded, info.nlm_cplx, howmany)
154169
finalizer(x->shtns_destroy(x.cfg), stream)
155170
return stream
156171
end
@@ -160,6 +175,8 @@ mutable struct SHTnsCfg{N<:SHTnsNorm, T<:SHTnsType}
160175
eps=1e-10,
161176
robert_form=false,
162177
nl_order = 0,
178+
howmany = 1,
179+
transform::Union{Type{Real}, Type{Complex}} = Real
163180
) where {T<:SHTnsType, N<:SHTnsNorm}
164181

165182
@assert lmax > 1
@@ -169,13 +186,19 @@ mutable struct SHTnsCfg{N<:SHTnsNorm, T<:SHTnsType}
169186
cfg = shtns_create(lmax, mmax, mres, norm)
170187
robert_form && shtns_robert_form(cfg,1)
171188
info = unsafe_load(cfg)
189+
if howmany > 1
190+
@assert transform == Real "Only real transform is supported for batched transforms"
191+
info = unsafe_load(cfg)
192+
spec_dist = transform == Real ? info.nlm : info.nlm_cplx
193+
shtns_set_many(cfg, howmany, spec_dist)
194+
end
172195
shtns_set_grid_auto(cfg, shtype, eps, nl_order, Ref(info.nlat), Ref(info.nphi))
173196
info = unsafe_load(cfg)
174197
li = Vector{Int}(unsafe_wrap(Vector{Cushort},info.li,Int(info.nlm)))
175198
mi = Vector{Int}(unsafe_wrap(Vector{Cushort},info.mi,Int(info.nlm)))
176199
ct = Vector{Float64}(unsafe_wrap(Vector{Cdouble},info.ct,Int(info.nlat)))
177200
st = Vector{Float64}(unsafe_wrap(Vector{Cdouble},info.st,Int(info.nlat)))
178-
stream = new{N,T}(cfg, norm, shtype, robert_form, info.nlm, info.lmax, info.mmax, info.mres, info.nlat_2, info.nlat, info.nphi, info.nspat, li, mi, ct, st, info.nlat_padded, info.nlm_cplx)
201+
stream = new{transform,N,T}(cfg, norm, shtype, robert_form, info.nlm, info.lmax, info.mmax, info.mres, info.nlat_2, info.nlat, info.nphi, info.nspat, li, mi, ct, st, info.nlat_padded, info.nlm_cplx, howmany)
179202
finalizer(x->shtns_destroy(x.cfg), stream)
180203
return stream
181204
end
@@ -223,6 +246,7 @@ const SHT_SCALAR_ONLY = UInt32(256 * 16)
223246
const SHT_LOAD_SAVE_CFG = UInt32(256 * 64)
224247
const SHT_ALLOW_GPU = UInt32(256 * 128)
225248
const SHT_ALLOW_PADDING = UInt32(256 * 256)
249+
const SHT_FP32 = UInt32(256 * 1024)
226250

227251
include("sht.jl")
228252
include("tools.jl")

0 commit comments

Comments
 (0)