CliMA
diff --git a/‎.travis.yml‎
Lines changed: 5 additions & 0 deletions b/‎.travis.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎Project.toml‎
Lines changed: 4 additions & 0 deletions b/‎Project.toml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/GPR/main.jl‎
Lines changed: 51 additions & 0 deletions b/‎examples/GPR/main.jl‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎src/GPR.jl‎
Lines changed: 202 additions & 0 deletions b/‎src/GPR.jl‎
Lines changed: 202 additions & 0 deletions
diff --git a/‎src/Solus.jl‎
Lines changed: 1 addition & 0 deletions b/‎src/Solus.jl‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/GPR/data/data_points.npy‎
12.6 KB b/‎test/GPR/data/data_points.npy‎
12.6 KB
diff --git a/‎test/GPR/data/matern_05_mean.npy‎
7.89 KB b/‎test/GPR/data/matern_05_mean.npy‎
7.89 KB
diff --git a/‎test/GPR/data/matern_05_std.npy‎
7.89 KB b/‎test/GPR/data/matern_05_std.npy‎
7.89 KB
diff --git a/‎test/GPR/data/matern_def_mean.npy‎
7.89 KB b/‎test/GPR/data/matern_def_mean.npy‎
7.89 KB
diff --git a/‎test/GPR/data/matern_def_std.npy‎
7.89 KB b/‎test/GPR/data/matern_def_std.npy‎
7.89 KB
@@ -1,5 +1,10 @@
 # Documentation: http://docs.travis-ci.com/user/languages/julia/
 language: julia
+
+env:
+  global:
+    - PYTHON=Conda
+
 os:
   - linux
   - osx
 
@@ -6,8 +6,12 @@ version = "0.1.0"
 [deps]
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+EllipsisNotation = "da5c29d0-fa7d-589e-88eb-ea29b0a81949"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
+ScikitLearn = "3646fa90-6ef7-5e7e-9f22-8aca16db6324"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [extras]
 DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
 
@@ -0,0 +1,51 @@
+#!/usr/bin/julia --
+
+import NPZ
+import PyPlot
+const plt = PyPlot
+
+include("../../src/GPR.jl")
+
+const gpr_data = NPZ.npzread("../../test/GPR/data/data_points.npy")
+
+################################################################################
+# main section #################################################################
+################################################################################
+# To avoid bloated and annoying warnings from ScikitLearn.jl, run this with
+#
+# julia --depwarn=no main.jl
+#
+
+# create an instance of GPR.Wrap with threshold set to -1
+# by convention, negative threshold means use all data, i.e. do not subsample
+gprw = GPR.Wrap(thrsh = -1)
+#gprw = GPR.Wrap()
+
+GPR.set_data!(gprw, gpr_data)
+
+GPR.subsample!(gprw) # this form is unnecessary, `learn!` will do it anyway
+#GPR.subsample!(gprw, 500) # ignore `gprw.thrsh` and subsample 500 points
+#GPR.subsample!(gprw, indices = 1:10) # subsample points with indices `indices`
+
+GPR.learn!(gprw) # fit GPR with "Const * RBF + White" kernel
+#GPR.learn!(gprw, noise = 1e-8) # `noise` is *non-optimized* additional noise
+#GPR.learn!(gprw, kernel = "matern") # "rbf" and "matern" are supported for now
+#GPR.learn!(gprw, kernel = "matern", nu = 1) # Matern's parameter nu; 1.5 by def
+
+mesh = minimum(gpr_data, dims=1)[1] : 0.01 : maximum(gpr_data, dims=1)[1]
+
+mean, std = GPR.predict(gprw, mesh, return_std = true)
+#mean = GPR.predict(gprw, mesh) # `return_std` is false by default
+
+################################################################################
+# plot section #################################################################
+################################################################################
+plt.plot(gpr_data[:,1], gpr_data[:,2], "r.", ms = 6, label = "Data points")
+plt.plot(mesh, mean, "k", lw = 2.5, label = "GPR mean")
+plt.fill_between(mesh, mean - 2*std, mean + 2*std, alpha = 0.4, zorder = 10,
+                 color = "k", label = "95% interval")
+
+plt.legend()
+plt.show()
+
+
@@ -0,0 +1,202 @@
+module GPR
+"""
+For the time being, please use `include("src/GPR.jl")` and not `using Solus.GPR`
+since there are precompile issues with the backend (scikit-learn)
+"""
+
+using Parameters # lets you have defaults for fields
+
+using EllipsisNotation # adds '..' to refer to the rest of array
+import ScikitLearn
+import StatsBase
+const sklearn = ScikitLearn
+
+sklearn.@sk_import gaussian_process : GaussianProcessRegressor
+sklearn.@sk_import gaussian_process.kernels : (RBF, Matern, WhiteKernel)
+
+
+"""
+A simple struct to handle Gaussian Process Regression related stuff
+
+Functions that operate on GPR.Wrap struct:
+ - set_data! (1 method)
+ - subsample! (3 methods)
+ - learn! (1 method)
+ - predict (1 method)
+
+Do *not* set Wrap's variables except for `thrsh`; use setter functions!
+"""
+@with_kw mutable struct Wrap
+  thrsh::Int = 500
+  data = nothing
+  subsample = nothing
+  GPR = nothing
+  __data_set::Bool = false
+  __subsample_set::Bool = false
+end
+
+################################################################################
+# GRPWrap-related functions ####################################################
+################################################################################
+"""
+Set `gprw.data` and reset `gprw.subsample` and `gprw.GPR` -- very important!
+
+Parameters:
+  - gprw:        an instance of GPR.Wrap
+  - data:        input data to learn from (at least 2-dimensional)
+
+`data` should be in the following format:
+  last column: values/labels/y values
+  first column(s): locations/x values
+"""
+function set_data!(gprw::Wrap, data::Array{<:Real})
+  if ndims(data) > 2
+    println(warn("set_data!"), "ndims(data) > 2; will use the first two dims")
+    idx = fill(1, ndims(data) - 2)
+    data = data[:,:,idx...]
+  elseif ndims(data) < 2
+    throw(error("set_data!: ndims(data) < 2; cannot proceed"))
+  end
+  gprw.data = data
+  gprw.subsample = nothing
+  gprw.GPR = nothing
+  gprw.__data_set = true
+  gprw.__subsample_set = false
+  println(name("set_data!"), size(gprw.data,1), " points")
+  flush(stdout)
+end
+
+"""
+Subsample `gprw.data` using `indices`
+
+Parameters:
+  - gprw:        an instance of GPR.Wrap
+  - indices:     indices that will be used to subsample `gprw.data`
+"""
+function subsample!(gprw::Wrap; indices::Union{Array{Int,1}, UnitRange{Int}})
+  gprw.subsample = @view(gprw.data[indices,..])
+  gprw.__subsample_set = true
+  println(name("subsample!"), size(gprw.subsample,1), " subsampled")
+  flush(stdout)
+end
+
+"""
+Draw `thrsh` subsamples from `gprw.data`
+
+Parameters:
+  - gprw:        an instance of GPR.Wrap
+  - thrsh:       threshold for the maximum number of points used in subsampling
+
+If `thrsh` > 0 and `thrsh` < number of `gprw.data` points:
+  subsample `thrsh` points uniformly randomly from `gprw.data`
+If `thrsh` > 0 and `thrsh` >= number of `gprw.data` points:
+  no subsampling, use whole `gprw.data`
+If `thrsh` < 0:
+  no subsampling, use whole `gprw.data`
+
+This function ignores `gprw.thrsh`
+"""
+function subsample!(gprw::Wrap, thrsh::Int)
+  if !gprw.__data_set
+    throw(error("subsample!: 'data' is not set, cannot sample"))
+  end
+  if thrsh == 0
+    throw(error("subsample!: 'thrsh' == 0, cannot sample"))
+  end
+
+  N = size(gprw.data,1)
+  if thrsh < 0
+    thrsh = N
+  end
+
+  if N > thrsh
+    inds = StatsBase.sample(1:N, thrsh, replace = false)
+  else
+    inds = 1:N
+  end
+
+  subsample!(gprw, indices = inds)
+end
+
+"""
+Wrapper for subsample!(gprw::Wrap, thrsh:Int)
+"""
+function subsample!(gprw::Wrap)
+  subsample!(gprw, gprw.thrsh)
+end
+
+"""
+Fit a GP regressor to `gprw.data` that was previously set
+
+Parameters:
+  - gprw:        an instance of GPR.Wrap
+  - kernel:      "rbf" or "matern"; "rbf" by default
+  - noise:       non-optimized noise level for the RBF kernel
+                 (in addition to the optimized one)
+  - nu:          Matern's nu parameter (smoothness of functions)
+"""
+function learn!(gprw::Wrap; kernel::String = "rbf", noise = 0.5, nu = 1.5)
+  if !gprw.__subsample_set
+    println(warn("learn!"), "'subsample' is not set; attempting to set...")
+    subsample!(gprw)
+  end
+
+  WK = WhiteKernel(1, (1e-10, 10))
+  if kernel == "matern"
+    GPR_kernel = 1.0 * Matern(length_scale = 1.0, nu = nu) + WK
+  else # including "rbf", which is the default
+    if kernel != "rbf"
+      println(warn("learn!"), "Kernel '", kernel, "' is not supported; ",
+              "falling back to RBF")
+    end
+    GPR_kernel = 1.0 * RBF(1.0, (1e-10, 1e+6)) + WK
+  end
+
+  gprw.GPR = GaussianProcessRegressor(
+      kernel = GPR_kernel,
+      n_restarts_optimizer = 7,
+      alpha = noise
+      )
+  sklearn.fit!(gprw.GPR, gprw.subsample[:,1:end-1], gprw.subsample[:,end])
+
+  println(name("learn!"), gprw.GPR.kernel_)
+  flush(stdout)
+end
+
+"""
+Return mean (and st. deviation) values
+
+Parameters:
+  - gprw:        an instance of GPR.Wrap
+  - x:           data for prediction
+  - return_std:  boolean flag, whether to return st. deviation
+
+Returns:
+  - mean:        mean of the GP regressor at `x` locations
+  - (mean, std): mean and st. deviation if `return_std` flag is true
+"""
+function predict(gprw::Wrap, x; return_std = false)
+  if ndims(x) == 1
+    # add an extra dimension to `x` if it's a vector (scikit-learn's whim)
+    return gprw.GPR.predict(reshape(x, (size(x)...,1)), return_std = return_std)
+  else
+    return gprw.GPR.predict(x, return_std = return_std)
+  end
+end
+
+################################################################################
+# convenience functions ########################################################
+################################################################################
+const RPAD = 25
+
+function name(name::AbstractString)
+  return rpad(name * ":", RPAD)
+end
+
+function warn(name::AbstractString)
+  return rpad("WARNING (" * name * "):", RPAD)
+end
+
+end # module
+
+
@@ -5,5 +5,6 @@ using Distributions, Statistics, LinearAlgebra, DocStringExtensions
 include("spaces.jl")
 include("problems.jl")
 include("neki.jl")
+include("GPR.jl")
 
 end # module