diff --git a/Project.toml b/Project.toml
index 11bc8361..2959561e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,7 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.16.8"
+version = "0.17"
+
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -15,7 +16,6 @@ LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 RecurrenceRelationships = "807425ed-42ea-44d6-a357-6771516d7b2c"
-Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
@@ -29,8 +29,7 @@ FastTransforms_jll = "0.6.2"
 FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13, 1"
 GenericFFT = "0.1"
 LazyArrays = "2.2"
-RecurrenceRelationships = "0.1"
-Reexport = "0.2, 1.0"
+RecurrenceRelationships = "0.2"
 SpecialFunctions = "0.10, 1, 2"
 ToeplitzMatrices = "0.7.1, 0.8"
 julia = "1.7"
diff --git a/docs/Project.toml b/docs/Project.toml
index ed412282..169ffdb7 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,5 +1,4 @@
 [deps]
-ApproxFun = "28f2ccd6-bb30-5033-b560-165f7b14dc2f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 FastTransforms = "057dd010-8810-581a-b7be-e3fc3b93f78c"
 LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
diff --git a/docs/make.jl b/docs/make.jl
index b6a44b03..dd530e8f 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -13,7 +13,6 @@ examples = [
     "halfrange.jl",
     "nonlocaldiffusion.jl",
     "padua.jl",
-    "semiclassical.jl",
     "sphere.jl",
     "spinweighted.jl",
     "subspaceangles.jl",
@@ -48,7 +47,6 @@ makedocs(
                         "generated/halfrange.md",
                         "generated/nonlocaldiffusion.md",
                         "generated/padua.md",
-                        "generated/semiclassical.md",
                         "generated/sphere.md",
                         "generated/spinweighted.md",
                         "generated/subspaceangles.md",
diff --git a/examples/semiclassical.jl b/examples/semiclassical.jl
deleted file mode 100644
index 9acecaca..00000000
--- a/examples/semiclassical.jl
+++ /dev/null
@@ -1,113 +0,0 @@
-# # Semi-classical Jacobi polynomials
-# In this example, we will consider the semi-classical orthogonal polynomials with respect to the inner product:
-# ```math
-# \langle f, g \rangle = \int_{-1}^1 f(x) g(x) w(x){\rm d} x,
-# ```
-# where $w(x) = w^{(\alpha,\beta,\gamma,\delta,\epsilon)}(x) = (1-x)^\alpha(1+x)^\beta(2+x)^\gamma(3+x)^\delta(5-x)^\epsilon$ is a modification of the Jacobi weight.
-# We shall use results from [this paper](https://arxiv.org/abs/2302.08448) to consider these semi-classical orthogonal polynomials as modifications of the orthonormalized Jacobi polynomials $\tilde{P}_n^{(\alpha,\beta)}(x)$.
-
-using ApproxFun, FastTransforms, LazyArrays, LinearAlgebra, Plots, LaTeXStrings
-const GENFIGS = joinpath(pkgdir(FastTransforms), "docs/src/generated")
-!isdir(GENFIGS) && mkdir(GENFIGS)
-plotlyjs()
-
-# We set the five parameters:
-α,β,γ,δ,ϵ = -0.125, -0.25, 0.123, 0.456, 0.789
-
-# We use `ApproxFun` to construct a finite normalized Jacobi series as a proxy for $(2+x)^\gamma(3+x)^\delta(5-x)^\epsilon$.
-u = Fun(x->(2+x)^γ*(3+x)^δ*(5-x)^ϵ, NormalizedJacobi(β, α))
-
-# Our working polynomial degree will be:
-n = 100
-
-# We compute the connection coefficients between the modified orthogonal polynomials and the Jacobi polynomials:
-P = plan_modifiedjac2jac(Float64, n+1, α, β, u.coefficients)
-
-# We store the connection to first kind Chebyshev polynomials:
-P1 = plan_jac2cheb(Float64, n+1, α, β; normjac = true)
-
-# We compute the Chebyshev series for the degree-$k\le n$ modified polynomial and its values at the Chebyshev points:
-q = k -> lmul!(P1, lmul!(P, [zeros(k); 1.0; zeros(n-k)]))
-qvals = k -> ichebyshevtransform(q(k))
-
-# With the symmetric Jacobi matrix for $\tilde{P}_n^{(\alpha, \beta)}(x)$ and the modified plan, we may compute the modified Jacobi matrix and the corresponding roots (as eigenvalues):
-x = Fun(x->x, NormalizedJacobi(β, α))
-XP = SymTridiagonal(Symmetric(Multiplication(x, space(x))[1:n+1, 1:n+1]))
-XQ = FastTransforms.modified_jacobi_matrix(P, XP)
-view(XQ, 1:7, 1:7)
-
-# And we plot:
-x = chebyshevpoints(Float64, n+1, Val(1))
-p = plot(x, qvals(0); linewidth=2.0, legend = false, xlim=(-1,1), xlabel=L"x",
-         ylabel=L"Q_n(x)", title="Semi-classical Jacobi Polynomials and Their Roots",
-         extra_plot_kwargs = KW(:include_mathjax => "cdn"))
-for k in 1:10
-    λ = eigvals(SymTridiagonal(XQ.dv[1:k], XQ.ev[1:k-1]))
-    plot!(x, qvals(k); linewidth=2.0, color=palette(:default)[k+1])
-    scatter!(λ, zero(λ); markersize=2.5, color=palette(:default)[k+1])
-end
-p
-savefig(joinpath(GENFIGS, "semiclassical.html"))
-###```@raw html
-###<object type="text/html" data="../semiclassical.html" style="width:100%;height:400px;"></object>
-###```
-
-# By [Theorem 2.20](https://arxiv.org/abs/2302.08448) it turns out that the *derivatives* of these particular semi-classical Jacobi polynomials are a linear combination of at most four polynomials orthogonal with respect to the weight $w^{(\alpha+1,\beta+1,\gamma+1,\delta+1,\epsilon+1)}(x)$ on $(-1,1)$. This fact enables us to compute the banded differentiation matrix:
-v = Fun(x->(2+x)^(γ+1)*(3+x)^(δ+1)*(5-x)^(ϵ+1), NormalizedJacobi(β+1, α+1))
-function threshold!(A::AbstractArray, ϵ)
-    for i in eachindex(A)
-        if abs(A[i]) < ϵ A[i] = 0 end
-    end
-    A
-end
-P′ = plan_modifiedjac2jac(Float64, n+1, α+1, β+1, v.coefficients)
-DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(α,β)}(x) = P^{(α+1,β+1)}(x) D_P.
-DQ = UpperTriangular(threshold!(P′\(DP*(P*I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(x) = Q̂(x) D_Q.
-UpperTriangular(DQ[1:9, 1:9])
-
-# A faster method now exists via the `GramMatrix` architecture and its associated displacement equation. Given the modified orthogonal polynomial moments implied by the normalized Jacobi series for $u(x)$, we pad this vector to the necessary size and construct the `GramMatrix` with these moments, the multiplication operator, and the constant $\tilde{P}_0^{(\alpha,\beta)}(x)$:
-μ = PaddedVector(u.coefficients, 2n+1)
-x = Fun(x->x, NormalizedJacobi(β, α))
-XP2 = SymTridiagonal(Symmetric(Multiplication(x, space(x))[1:2n+1, 1:2n+1]))
-p0 = Fun(NormalizedJacobi(β, α), [1])(0)
-G = GramMatrix(μ, XP2, p0)
-view(G, 1:7, 1:7)
-
-# And compute its cholesky factorization. The upper-triangular Cholesky factor represents the connection between original Jacobi and semi-classical Jacobi as ${\bf P}^{(\alpha,\beta)}(x) = {\bf Q}(x) R$.
-R = cholesky(G).U
-UpperTriangular(view(R, 1:7, 1:7))
-
-# Every else works almost as before, including evaluation on a Chebyshev grid:
-q = k -> lmul!(P1, ldiv!(R, [zeros(k); 1.0; zeros(n-k)]))
-qvals = k -> ichebyshevtransform(q(k))
-
-# Computation of the modified Jacobi matrix:
-XQ1 = FastTransforms.modified_jacobi_matrix(R, XP)
-norm(XQ-XQ1)/norm(XQ)
-
-# Plotting:
-x = chebyshevpoints(Float64, n+1, Val(1))
-p = plot(x, qvals(0); linewidth=2.0, legend = false, xlim=(-1,1), xlabel=L"x",
-         ylabel=L"Q_n(x)", title="Semi-classical Jacobi Polynomials and Their Roots",
-         extra_plot_kwargs = KW(:include_mathjax => "cdn"))
-for k in 1:10
-    λ = eigvals(SymTridiagonal(XQ1.dv[1:k], XQ1.ev[1:k-1]))
-    plot!(x, qvals(k); linewidth=2.0, color=palette(:default)[k+1])
-    scatter!(λ, zero(λ); markersize=2.5, color=palette(:default)[k+1])
-end
-p
-savefig(joinpath(GENFIGS, "semiclassical1.html"))
-###```@raw html
-###<object type="text/html" data="../semiclassical1.html" style="width:100%;height:400px;"></object>
-###```
-
-# And banded differentiation:
-μ′ = PaddedVector(v.coefficients, 2n+1)
-x′ = Fun(x->x, NormalizedJacobi(β+1, α+1))
-XP′ = SymTridiagonal(Symmetric(Multiplication(x′, space(x′))[1:2n+1, 1:2n+1]))
-p0′ = Fun(NormalizedJacobi(β+1, α+1), [1])(0)
-G′ = GramMatrix(μ′, XP′, p0′)
-R′ = cholesky(G′).U
-DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(α,β)}(x) = P^{(α+1,β+1)}(x) D_P.
-DQ = UpperTriangular(threshold!(R′*(DP*(R\I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(x) = Q̂(x) D_Q.
-UpperTriangular(DQ[1:9, 1:9])
diff --git a/examples/sphere.jl b/examples/sphere.jl
index 916975b5..a28a36a2 100644
--- a/examples/sphere.jl
+++ b/examples/sphere.jl
@@ -61,7 +61,7 @@ C = [k/(k+1) for k in 0:N]
 c = zeros(N); c[N] = 1
 pts = vec([z(θ, φ)⋅y for θ in θ, φ in φ])
 phi0 = ones(N*M)
-F = reshape(FastTransforms.clenshaw!(c, A, B, C, pts, phi0, zeros(N*M)), N, M)
+F = reshape(FastTransforms.clenshaw!(zeros(N*M), c, A, B, C, pts, phi0), N, M)
 
 # We superpose a surface plot of $f$ on top of the grid:
 X = [sinpi(θ)*cospi(φ) for θ in θ, φ in φ]
@@ -91,7 +91,7 @@ U = threshold!(P\V, 400*eps())
 nrm1 = norm(U)
 
 # Similarly, on the tensor product grid, our function samples are:
-Pnxy = FastTransforms.clenshaw!(c, A, B, C, [x⋅y], [1.0], [0.0])[1]
+Pnxy = FastTransforms.clenshaw!([0.0], c, A, B, C, [x⋅y], [1.0])[1]
 F = [(F[n, m] - Pnxy)/(z(θ[n], φ[m])⋅y - x⋅y) for n in 1:N, m in 1:M]
 
 # We superpose a surface plot of $f$ on top of the grid:
@@ -108,7 +108,7 @@ U = threshold!(P\V, 400*eps())
 
 # Finally, the Legendre polynomial $P_n(z\cdot x)$ is aligned with the grid:
 pts = vec([z(θ, φ)⋅x for θ in θ, φ in φ])
-F = reshape(FastTransforms.clenshaw!(c, A, B, C, pts, phi0, zeros(N*M)), N, M)
+F = reshape(FastTransforms.clenshaw!(zeros(N*M), c, A, B, C, pts, phi0), N, M)
 
 # We superpose a surface plot of $f$ on top of the grid:
 scatter3d(vec(X), vec(Y), vec(Z); markersize=1.25, markercolor=:violetred)
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 8a3d5a3c..4eef13ad 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -1,11 +1,11 @@
 module FastTransforms
 
 using ArrayLayouts, BandedMatrices, FastGaussQuadrature, FillArrays, LazyArrays, LinearAlgebra,
-      Reexport, SpecialFunctions, ToeplitzMatrices, RecurrenceRelationships
+      SpecialFunctions, ToeplitzMatrices, RecurrenceRelationships
 
-@reexport using AbstractFFTs
-@reexport using FFTW
-@reexport using GenericFFT
+using AbstractFFTs
+using FFTW
+using GenericFFT
 
 import Base: convert, unsafe_convert, eltype, ndims, adjoint, transpose, show,
              *, \, inv, length, size, view, getindex, tail, OneTo
@@ -34,11 +34,8 @@ import LinearAlgebra: cholesky, issymmetric, isposdef, mul!, lmul!, ldiv!
 
 import GenericFFT: interlace # imported in downstream packages
 
-import RecurrenceRelationships: clenshaw!, check_clenshaw_recurrences
+import RecurrenceRelationships: check_clenshaw_recurrences
 
-const _forwardrecurrence! = RecurrenceRelationships.forwardrecurrence!
-const _clenshaw_next = RecurrenceRelationships.clenshaw_next
-const _forwardrecurrence_next = RecurrenceRelationships.forwardrecurrence_next
 
 export leg2cheb, cheb2leg, ultra2ultra, jac2jac,
        lag2lag, jac2ultra, ultra2jac, jac2cheb,
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index d89f0490..3ce492d9 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -49,13 +49,13 @@ function renew!(x::AbstractArray{BigFloat})
     return x
 end
 
-function horner!(c::StridedVector{Float64}, x::Vector{Float64}, f::Vector{Float64})
+function horner!(f::Vector{Float64}, c::StridedVector{Float64}, x::Vector{Float64})
     @assert length(x) == length(f)
     ccall((:ft_horner, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, stride(c, 1), length(x), x, f)
     f
 end
 
-function horner!(c::StridedVector{Float32}, x::Vector{Float32}, f::Vector{Float32})
+function horner!(f::Vector{Float32}, c::StridedVector{Float32}, x::Vector{Float32})
     @assert length(x) == length(f)
     ccall((:ft_hornerf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Cint, Ptr{Float32}, Ptr{Float32}), length(c), c, stride(c, 1), length(x), x, f)
     f
@@ -69,19 +69,19 @@ function check_clenshaw_points(x, f)
     length(x) == length(f) || throw(ArgumentError("Dimensions must match"))
 end
 
-function clenshaw!(c::StridedVector{Float64}, x::Vector{Float64}, f::Vector{Float64})
+function clenshaw!(f::Vector{Float64}, c::StridedVector{Float64}, x::Vector{Float64})
     @boundscheck check_clenshaw_points(x, f)
     ccall((:ft_clenshaw, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, stride(c, 1), length(x), x, f)
     f
 end
 
-function clenshaw!(c::StridedVector{Float32}, x::Vector{Float32}, f::Vector{Float32})
+function clenshaw!(f::Vector{Float32}, c::StridedVector{Float32}, x::Vector{Float32})
     @boundscheck check_clenshaw_points(x, f)
     ccall((:ft_clenshawf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Cint, Ptr{Float32}, Ptr{Float32}), length(c), c, stride(c, 1), length(x), x, f)
     f
 end
 
-function clenshaw!(c::StridedVector{Float64}, A::Vector{Float64}, B::Vector{Float64}, C::Vector{Float64}, x::Vector{Float64}, ϕ₀::Vector{Float64}, f::Vector{Float64})
+function clenshaw!(f::Vector{Float64}, c::StridedVector{Float64}, A::Vector{Float64}, B::Vector{Float64}, C::Vector{Float64}, x::Vector{Float64}, ϕ₀::Vector{Float64})
     N = length(c)
     @boundscheck check_clenshaw_recurrences(N, A, B, C)
     @boundscheck check_clenshaw_points(x, ϕ₀, f)
@@ -89,7 +89,7 @@ function clenshaw!(c::StridedVector{Float64}, A::Vector{Float64}, B::Vector{Floa
     f
 end
 
-function clenshaw!(c::StridedVector{Float32}, A::Vector{Float32}, B::Vector{Float32}, C::Vector{Float32}, x::Vector{Float32}, ϕ₀::Vector{Float32}, f::Vector{Float32})
+function clenshaw!(f::Vector{Float32}, c::StridedVector{Float32}, A::Vector{Float32}, B::Vector{Float32}, C::Vector{Float32}, x::Vector{Float32}, ϕ₀::Vector{Float32})
     N = length(c)
     @boundscheck check_clenshaw_recurrences(N, A, B, C)
     @boundscheck check_clenshaw_points(x, ϕ₀, f)
diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index de9d5f78..545da7d9 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -8,10 +8,10 @@ FastTransforms.ft_set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
         c = one(T) ./ (1:n)
         x = collect(-1 .+ 2*(0:n-1)/T(n))
         f = similar(x)
-        @test FastTransforms.horner!(c, x, f) == f
+        @test FastTransforms.horner!(f, c, x) == f
         fd = T[sum(c[k]*x^(k-1) for k in 1:length(c)) for x in x]
         @test f ≈ fd
-        @test FastTransforms.clenshaw!(c, x, f) == f
+        @test FastTransforms.clenshaw!(f, c, x) == f
         fd = T[sum(c[k]*cos((k-1)*acos(x)) for k in 1:length(c)) for x in x]
         @test f ≈ fd
         A = T[(2k+one(T))/(k+one(T)) for k in 0:length(c)-1]
@@ -19,7 +19,7 @@ FastTransforms.ft_set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
         C = T[k/(k+one(T)) for k in 0:length(c)]
         phi0 = ones(T, length(x))
         c = FastTransforms.lib_cheb2leg(c)
-        @test FastTransforms.clenshaw!(c, A, B, C, x, phi0, f) == f
+        @test FastTransforms.clenshaw!(f, c, A, B, C, x, phi0) == f
         @test f ≈ fd
     end