Polynomial Regression Predictor

davidrichey · davidrichey · commit 5ff2ebbc501f · 2019-01-07T11:37:05.000-05:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
 ## Unreleased
+### Added
+- Polynomial Regression predictor
+
 ### Modified
 - code refactoring
 
@@ -41,4 +44,4 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ## [0.1.0] - 2018-11-15
 ### Added
-- K-Nearest Neighbours algorithm
+- K-Nearest Neighbours algorithm
diff --git a/lib/learn_kit/regression/polynomial.ex b/lib/learn_kit/regression/polynomial.ex
@@ -0,0 +1,172 @@
+defmodule LearnKit.Regression.Polynomial do
+  @moduledoc """
+  Module for Polynomial Regression algorithm
+  """
+
+  defstruct factors: [], results: [], coefficients: [], degree: 2
+
+  alias LearnKit.Regression.Polynomial
+
+  @type factors :: [number]
+  @type results :: [number]
+  @type coefficients :: [number]
+  @type degree :: number
+
+  @doc """
+  Creates polynomial predictor with data_set
+
+  ## Parameters
+
+    - factors: Array of predictor variables
+    - results: Array of criterion variables
+
+  ## Examples
+
+      iex> predictor = LearnKit.Regression.Polynomial.new([1, 2, 3, 4], [3, 6, 10, 15])
+      %LearnKit.Regression.Polynomial{factors: [1, 2, 3, 4], results: [3, 6, 10, 15], coefficients: [], degree: 2}
+
+  """
+  def new(factors, results) when is_list(factors) and is_list(results) do
+    %Polynomial{factors: factors, results: results}
+  end
+
+  def new(_, _), do: Polynomial.new([], [])
+  def new, do: Polynomial.new([], [])
+
+  @doc """
+  Fit train data
+
+  ## Parameters
+
+    - predictor: %LearnKit.Regression.Polynomial{}
+    - options: keyword list with options
+
+  ## Options
+
+    - degree: nth degree of polynomial model, default set to 2
+
+  ## Examples
+
+      iex> predictor = predictor |> LearnKit.Regression.Polynomial.fit
+      %LearnKit.Regression.Polynomial{
+        coefficients: [0.9999999999998295, 1.5000000000000853, 0.4999999999999787],
+        degree: 2,
+        factors: [1, 2, 3, 4],
+        results: [3, 6, 10, 15]
+      }
+
+      iex> predictor = predictor |> LearnKit.Regression.Polynomial.fit([degree: 3])
+      %LearnKit.Regression.Polynomial{
+        coefficients: [1.0000000000081855, 1.5000000000013642, 0.5,
+         8.526512829121202e-14],
+        degree: 3,
+        factors: [1, 2, 3, 4],
+        results: [3, 6, 10, 15]
+      }
+
+  """
+  def fit(%Polynomial{factors: factors, results: results}, options \\ []) do
+    degree = options[:degree] || 2
+    matrix = matrix(factors, degree)
+    xys = x_y_matrix(factors, results, degree + 1, [])
+    coefficients = matrix |> Matrix.inv() |> Matrix.mult(xys) |> List.flatten()
+    %Polynomial{factors: factors, results: results, coefficients: coefficients, degree: degree}
+  end
+
+  @doc """
+  Predict using the polynomial model
+
+  ## Parameters
+
+    - predictor: %LearnKit.Regression.Polynomial{}
+    - samples: Array of variables
+
+  ## Examples
+
+      iex> predictor |> LearnKit.Regression.Polynomial.predict([5,6])
+      {:ok, [20.999999999999723, 27.999999999999574]}
+
+  """
+  def predict(polynomial = %Polynomial{coefficients: _, degree: _}, samples)
+      when is_list(samples) do
+    {:ok,
+     Enum.map(samples, fn sample ->
+       {:ok, prediction} = predict(polynomial, sample)
+       prediction
+     end)}
+  end
+
+  @doc """
+  Predict using the polynomial model
+
+  ## Parameters
+
+    - predictor: %LearnKit.Regression.Polynomial{}
+    - sample: Sample variable
+
+  ## Examples
+
+      iex> predictor |> LearnKit.Regression.Polynomial.predict(5)
+      {:ok, 20.999999999999723}
+
+  """
+  def predict(%Polynomial{coefficients: coefficients, degree: degree}, sample) do
+    ordered_coefficients = coefficients |> Enum.reverse()
+    {:ok, substitute_coefficients(ordered_coefficients, sample, degree, 0.0)}
+  end
+
+  defp matrix_line(1, factors, degree) do
+    power_ofs = Enum.to_list(1..degree)
+
+    [Enum.count(factors)] ++
+      Enum.map(power_ofs, fn factor ->
+        sum_x_with_k(factors, factor, 0.0)
+      end)
+  end
+
+  defp matrix_line(line, factors, degree) do
+    line_factor = line - 1
+    power_ofs = Enum.to_list(line_factor..(degree + line_factor))
+
+    Enum.map(power_ofs, fn factor ->
+      sum_x_with_k(factors, factor, 0.0)
+    end)
+  end
+
+  defp matrix(factors, degree) do
+    lines = Enum.to_list(1..(degree + 1))
+
+    Enum.map(lines, fn line ->
+      matrix_line(line, factors, degree)
+    end)
+  end
+
+  defp substitute_coefficients([], _, _, sum), do: sum
+
+  defp substitute_coefficients([coefficient | tail], x, k, sum) do
+    sum = sum + :math.pow(x, k) * coefficient
+    substitute_coefficients(tail, x, k - 1, sum)
+  end
+
+  defp sum_x_with_k([x | tail], k, sum) do
+    sum = sum + :math.pow(x, k)
+    sum_x_with_k(tail, k, sum)
+  end
+
+  defp sum_x_with_k([], _, sum), do: sum
+
+  defp sum_x_y_with_k([], [], _degree, sum), do: [sum]
+
+  defp sum_x_y_with_k([x | xtail], [y | ytail], degree, sum) do
+    exponent = degree - 1
+    sum = sum + :math.pow(x, exponent) * y
+    sum_x_y_with_k(xtail, ytail, degree, sum)
+  end
+
+  def x_y_matrix(_, _, 0, matrix), do: matrix |> Enum.reverse()
+
+  def x_y_matrix(xs, ys, degree, matrix) do
+    matrix = matrix ++ [sum_x_y_with_k(xs, ys, degree, 0.0)]
+    x_y_matrix(xs, ys, degree - 1, matrix)
+  end
+end
diff --git a/mix.exs b/mix.exs
@@ -27,7 +27,8 @@ defmodule LearnKit.MixProject do
 
   defp deps do
     [
-      {:ex_doc, "~> 0.19", only: :dev}
+      {:ex_doc, "~> 0.19", only: :dev},
+      {:matrix, "~> 0.3.2"}
     ]
   end
 
diff --git a/mix.lock b/mix.lock
@@ -1,7 +1,9 @@
 %{
-  "earmark": {:hex, :earmark, "1.2.6", "b6da42b3831458d3ecc57314dff3051b080b9b2be88c2e5aa41cd642a5b044ed", [:mix], [], "hexpm"},
-  "ex_doc": {:hex, :ex_doc, "0.19.1", "519bb9c19526ca51d326c060cb1778d4a9056b190086a8c6c115828eaccea6cf", [:mix], [{:earmark, "~> 1.1", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.7", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"},
-  "makeup": {:hex, :makeup, "0.5.5", "9e08dfc45280c5684d771ad58159f718a7b5788596099bdfb0284597d368a882", [:mix], [{:nimble_parsec, "~> 0.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"},
-  "makeup_elixir": {:hex, :makeup_elixir, "0.10.0", "0f09c2ddf352887a956d84f8f7e702111122ca32fbbc84c2f0569b8b65cbf7fa", [:mix], [{:makeup, "~> 0.5.5", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm"},
-  "nimble_parsec": {:hex, :nimble_parsec, "0.4.0", "ee261bb53214943679422be70f1658fff573c5d0b0a1ecd0f18738944f818efe", [:mix], [], "hexpm"},
+  "earmark": {:hex, :earmark, "1.2.6", "b6da42b3831458d3ecc57314dff3051b080b9b2be88c2e5aa41cd642a5b044ed", [:mix], []},
+  "ex_doc": {:hex, :ex_doc, "0.19.1", "519bb9c19526ca51d326c060cb1778d4a9056b190086a8c6c115828eaccea6cf", [:mix], [{:earmark, "~> 1.1", [hex: :earmark, optional: false]}, {:makeup_elixir, "~> 0.7", [hex: :makeup_elixir, optional: false]}]},
+  "exprintf": {:hex, :exprintf, "0.2.1", "b7e895dfb00520cfb7fc1671303b63b37dc3897c59be7cbf1ae62f766a8a0314", [:mix], []},
+  "makeup": {:hex, :makeup, "0.5.5", "9e08dfc45280c5684d771ad58159f718a7b5788596099bdfb0284597d368a882", [:mix], [{:nimble_parsec, "~> 0.4", [hex: :nimble_parsec, optional: false]}]},
+  "makeup_elixir": {:hex, :makeup_elixir, "0.10.0", "0f09c2ddf352887a956d84f8f7e702111122ca32fbbc84c2f0569b8b65cbf7fa", [:mix], [{:makeup, "~> 0.5.5", [hex: :makeup, optional: false]}]},
+  "matrix": {:hex, :matrix, "0.3.2", "9c826bc3a1117bf5e1c5cdcf3a3d95456c93bc2e127a04e363e9fc90b724f784", [:mix], [{:exprintf, "~> 0.1", [hex: :exprintf, optional: false]}]},
+  "nimble_parsec": {:hex, :nimble_parsec, "0.4.0", "ee261bb53214943679422be70f1658fff573c5d0b0a1ecd0f18738944f818efe", [:mix], []},
 }
diff --git a/test/learn_kit/regression/polynomial_test.exs b/test/learn_kit/regression/polynomial_test.exs
@@ -0,0 +1,59 @@
+defmodule LearnKit.Regression.PolynomialTest do
+  use ExUnit.Case
+
+  alias LearnKit.Regression.Polynomial
+
+  test "create new polynomial predictor with empty data set" do
+    assert Polynomial.new() == %Polynomial{}
+  end
+
+  test "create new polynomial predictor with data" do
+    assert Polynomial.new([1, 2], [3, 4]) == %Polynomial{
+             coefficients: [],
+             degree: 2,
+             factors: [1, 2],
+             results: [3, 4]
+           }
+  end
+
+  def factors, do: [-3, -2, -1, -0.2, 1, 3]
+  def results, do: [0.9, 0.8, 0.4, 0.2, 0.1, 0]
+
+  test "fit data set" do
+    assert Polynomial.new(factors(), results())
+           |> Polynomial.fit(degree: 2) == %Polynomial{
+             coefficients: [0.2290655593570844, -0.16280041315555793, 0.027763965678671648],
+             degree: 2,
+             factors: factors(),
+             results: results()
+           }
+  end
+
+  test "fit data set with degree of 4" do
+    assert Polynomial.new(factors(), results())
+           |> Polynomial.fit(degree: 4) == %Polynomial{
+             coefficients: [
+               0.14805723970909512,
+               -0.15811217698985996,
+               0.12329778502873823,
+               8.627221168971827e-4,
+               -0.009963024223179073
+             ],
+             degree: 4,
+             factors: factors(),
+             results: results()
+           }
+  end
+
+  test "predict using the polynomial model of simple sample" do
+    assert Polynomial.new(factors(), results())
+           |> Polynomial.fit(degree: 2)
+           |> Polynomial.predict(3) == {:ok, -0.009459989001544572}
+  end
+
+  test "predict using the polynomial model of multiple samples" do
+    assert Polynomial.new(factors(), results())
+           |> Polynomial.fit(degree: 2)
+           |> Polynomial.predict([3, 5]) == {:ok, [-0.009459989001544572, 0.10916263554608596]}
+  end
+end

Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,8 @@ defmodule LearnKit.MixProject do`
`27`	`27`
`28`	`28`	`defp deps do`
`29`	`29`	`[`
`30`		`- {:ex_doc, "~> 0.19", only: :dev}`
	`30`	`+ {:ex_doc, "~> 0.19", only: :dev},`
	`31`	`+ {:matrix, "~> 0.3.2"}`
`31`	`32`	`]`
`32`	`33`	`end`
`33`	`34`