first draft

FelixBenning · FelixBenning · commit 7e2b8dcd12f2 · 2023-05-17T07:12:22.000+02:00
diff --git a/Project.toml b/Project.toml
@@ -8,10 +8,12 @@ Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
 CompositionsBase = "a33af91c-f02d-484b-be07-31d278c5ca2b"
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
+OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
diff --git a/src/KernelFunctions.jl b/src/KernelFunctions.jl
@@ -125,6 +125,7 @@ include("chainrules.jl")
 include("zygoterules.jl")
 
 include("TestUtils.jl")
+include("diffKernel.jl")
 
 function __init__()
     @require Kronecker = "2c470bb0-bcc8-11e8-3dad-c9649493f05e" begin
diff --git a/src/diffKernel.jl b/src/diffKernel.jl
@@ -0,0 +1,108 @@
+using OneHotArrays: OneHotVector
+import ForwardDiff as FD
+import LinearAlgebra as LA
+
+""" 
+	DiffPt(x; partial=())
+
+For a covariance kernel k of GP Z, i.e.
+```julia
+	k(x,y) # = Cov(Z(x), Z(y)),
+```
+a DiffPt allows the differentiation of Z, i.e.	
+```julia
+	k(DiffPt(x, partial=1), y) # = Cov(∂₁Z(x), Z(y))
+```
+for higher order derivatives partial can be any iterable, i.e.
+```julia
+	k(DiffPt(x, partial=(1,2)), y) # = Cov(∂₁∂₂Z(x), Z(y))
+```
+"""
+struct DiffPt{Dim}
+	pos # the actual position
+	partial
+end
+
+DiffPt(x;partial=()) = DiffPt{length(x)}(x, partial) # convenience constructor
+
+"""
+Take the partial derivative of a function `fun`  with input dimesion `dim`.
+If partials=(i,j), then (∂ᵢ∂ⱼ fun) is returned.
+"""
+function partial(fun, dim, partials=())
+	if !isnothing(local next = iterate(partials))
+		idx, state = next
+		return partial(
+			x -> FD.derivative(0) do dx
+				fun(x .+ dx * OneHotVector(idx, dim))
+			end,
+			dim,
+			Base.rest(partials, state),
+		)
+	end
+	return fun 
+end
+
+"""
+Take the partial derivative of a function with two dim-dimensional inputs,
+i.e. 2*dim dimensional input
+"""
+function partial(k, dim; partials_x=(), partials_y=())
+	local f(x,y) = partial(t -> k(t,y), dim, partials_x)(x)
+	return (x,y) -> partial(t -> f(x,t), dim, partials_y)(y)
+end
+
+
+
+
+"""
+	_evaluate(k::T, x::DiffPt{Dim}, y::DiffPt{Dim}) where {Dim, T<:Kernel}
+
+implements `(k::T)(x::DiffPt{Dim}, y::DiffPt{Dim})` for all kernel types. But since
+generics are not allowed in the syntax above by the dispatch system, this
+redirection over `_evaluate` is necessary
+
+unboxes the partial instructions from DiffPt and applies them to k,
+evaluates them at the positions of DiffPt
+"""
+function _evaluate(k::T, x::DiffPt{Dim}, y::DiffPt{Dim}) where {Dim, T<:Kernel}
+	return partial(
+		k, Dim,
+		partials_x=x.partial, partials_y=y.partial
+	)(x.pos, y.pos)
+end
+
+
+
+#=
+This is a hack to work around the fact that the `where {T<:Kernel}` clause is
+not allowed for the `(::T)(x,y)` syntax. If we were to only implement
+```julia
+	(::Kernel)(::DiffPt,::DiffPt)
+```
+then julia would not know whether to use
+`(::SpecialKernel)(x,y)` or `(::Kernel)(x::DiffPt, y::DiffPt)`
+```
+To avoid this hack, no kernel type T should implement
+```julia
+	(::T)(x,y)
+```
+and instead implement
+```julia
+	_evaluate(k::T, x, y)
+```
+Then there should be only a single
+```julia
+	(k::Kernel)(x,y) = evaluate(k, x, y)
+```
+which all the kernels would fall back to.
+
+This ensures that evaluate(k::T, x::DiffPt{Dim}, y::DiffPt{Dim}) is always
+more specialized and call beforehand.
+=#
+for T in [SimpleKernel, Kernel] #subtypes(Kernel)
+	(k::T)(x::DiffPt{Dim}, y::DiffPt{Dim}) where {Dim} = evaluate(k, x, y)
+	(k::T)(x::DiffPt{Dim}, y) where {Dim} = evaluate(k, x, DiffPt(y))
+	(k::T)(x, y::DiffPt{Dim}) where {Dim} = evaluate(k, DiffPt(x), y)
+end
+
diff --git a/test/diffKernel.jl b/test/diffKernel.jl
@@ -0,0 +1,25 @@
+@testset "diffKernel" begin
+	@testset "smoke test" begin
+		k = MaternKernel()
+		k(1,1)
+		k(1, DiffPt(1, partial=(1,1))) # Cov(Z(x), ∂₁∂₁Z(y)) where x=1, y=1
+		k(DiffPt([1], partial=1), [2]) # Cov(∂₁Z(x), Z(y)) where x=[1], y=[2]
+		k(DiffPt([1,2], partial=(1)), DiffPt([1,2], partial=2))# Cov(∂₁Z(x), ∂₂Z(y)) where x=[1,2], y=[1,2]
+	end
+
+	@testset "Sanity Checks with $k" for k in [MaternKernel()]
+		for x in [0, 1, -1, 42]
+			# for stationary kernels Cov(∂Z(x) , Z(x)) = 0
+			@test k(DiffPt(x, partial=1), x) ≈ 0
+
+			# the slope should be positively correlated with a point further down
+			@test k(
+				DiffPt(x, partial=1), # slope
+				x + 1e-10 # point further down
+			) > 0 
+
+			# correlation with self should be positive
+			@test k(DiffPt(x, partial=1), DiffPt(x, partial=1)) > 0
+		end
+	end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -176,6 +176,7 @@ include("test_utils.jl")
         include("generic.jl")
         include("chainrules.jl")
         include("zygoterules.jl")
+        include("diffKernel.jl")
 
         @testset "doctests" begin
             DocMeta.setdocmeta!(