Skip to content

Commit cb20cdf

Browse files
authored
Merge pull request #3 from davidrichey/polynomial_regression
Polynomial Regression Predictor
2 parents 44037e4 + 3e408ec commit cb20cdf

File tree

9 files changed

+423
-62
lines changed

9 files changed

+423
-62
lines changed

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
55
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
66

77
## Unreleased
8+
### Added
9+
- Polynomial Regression predictor
10+
811
### Modified
912
- code refactoring
1013

@@ -41,4 +44,4 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
4144

4245
## [0.1.0] - 2018-11-15
4346
### Added
44-
- K-Nearest Neighbours algorithm
47+
- K-Nearest Neighbours algorithm

lib/learn_kit/regression/linear.ex

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ defmodule LearnKit.Regression.Linear do
88
alias LearnKit.Regression.Linear
99

1010
use Linear.Calculations
11+
use LearnKit.Regression.Score
1112

1213
@type factors :: [number]
1314
@type results :: [number]
@@ -42,7 +43,8 @@ defmodule LearnKit.Regression.Linear do
4243
"""
4344
@spec new(factors, results) :: %Linear{factors: factors, results: results, coefficients: []}
4445

45-
def new(factors, results) when is_list(factors) and is_list(results), do: %Linear{factors: factors, results: results}
46+
def new(factors, results) when is_list(factors) and is_list(results),
47+
do: %Linear{factors: factors, results: results}
4648

4749
@doc """
4850
Fit train data
@@ -73,13 +75,19 @@ defmodule LearnKit.Regression.Linear do
7375
}
7476
7577
"""
76-
@spec fit(%Linear{factors: factors, results: results}) :: %Linear{factors: factors, results: results, coefficients: coefficients}
77-
78-
def fit(%Linear{factors: factors, results: results}, options \\ []) when is_list(options) do
78+
@spec fit(%Linear{factors: factors, results: results}) :: %Linear{
79+
factors: factors,
80+
results: results,
81+
coefficients: coefficients
82+
}
83+
84+
def fit(linear = %Linear{factors: factors, results: results}, options \\ [])
85+
when is_list(options) do
7986
coefficients =
8087
Keyword.merge([method: ""], options)
8188
|> define_method_for_fit()
82-
|> do_fit(factors, results)
89+
|> do_fit(linear)
90+
8391
%Linear{factors: factors, results: results, coefficients: coefficients}
8492
end
8593

@@ -106,32 +114,30 @@ defmodule LearnKit.Regression.Linear do
106114
"""
107115
@spec predict(%Linear{coefficients: coefficients}, list) :: {:ok, list}
108116

109-
def predict(%Linear{coefficients: coefficients}, samples) when is_list(samples) do
117+
def predict(linear = %Linear{coefficients: _}, samples) when is_list(samples) do
110118
{
111119
:ok,
112-
Enum.map(samples, fn sample -> predict_sample(sample, coefficients) end)
120+
do_predict(linear, samples)
113121
}
114122
end
115123

116124
@doc """
117-
Returns the coefficient of determination R^2 of the prediction
125+
Predict using the linear model
118126
119127
## Parameters
120128
121129
- predictor: %LearnKit.Regression.Linear{}
130+
- sample: Sample variable
122131
123132
## Examples
124133
125-
iex> predictor |> LearnKit.Regression.Linear.score
126-
{:ok, 0.9876543209876543}
134+
iex> predictor |> LearnKit.Regression.Linear.predict(4)
135+
{:ok, 14.5}
127136
128137
"""
129-
@spec score(%Linear{factors: factors, results: results, coefficients: coefficients}) :: {:ok, number}
138+
@spec predict(%Linear{coefficients: coefficients}, list) :: {:ok, list}
130139

131-
def score(%Linear{factors: factors, results: results, coefficients: coefficients}) do
132-
{
133-
:ok,
134-
calculate_score(coefficients, factors, results)
135-
}
140+
def predict(%Linear{coefficients: [alpha, beta]}, sample) do
141+
{:ok, sample * beta + alpha}
136142
end
137143
end

lib/learn_kit/regression/linear/calculations.ex

Lines changed: 50 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -4,60 +4,59 @@ defmodule LearnKit.Regression.Linear.Calculations do
44
"""
55

66
alias LearnKit.Math
7+
alias LearnKit.Regression.Linear
78

89
defmacro __using__(_opts) do
910
quote do
10-
defp do_fit(method, factors, results) when method == "gradient descent" do
11-
gradient_descent_iteration([:rand.uniform, :rand.uniform], 0.0001, nil, 1000000, Enum.zip(factors, results), 0)
11+
defp do_fit(method, %Linear{factors: factors, results: results})
12+
when method == "gradient descent" do
13+
gradient_descent_iteration(
14+
[:rand.uniform(), :rand.uniform()],
15+
0.0001,
16+
nil,
17+
1_000_000,
18+
Enum.zip(factors, results),
19+
0
20+
)
1221
end
1322

14-
defp do_fit(_, factors, results) do
15-
beta = Math.correlation(factors, results) * Math.standard_deviation(results) / Math.standard_deviation(factors)
23+
defp do_fit(_, %Linear{factors: factors, results: results}) do
24+
beta =
25+
Math.correlation(factors, results) * Math.standard_deviation(results) /
26+
Math.standard_deviation(factors)
27+
1628
alpha = Math.mean(results) - beta * Math.mean(factors)
1729
[alpha, beta]
1830
end
1931

20-
defp predict_sample(sample, [alpha, beta]) do
21-
sample * beta + alpha
32+
defp do_predict(linear, samples) do
33+
Enum.map(samples, fn sample ->
34+
{:ok, prediction} = predict(linear, sample)
35+
prediction
36+
end)
2237
end
2338

24-
defp calculate_score([], _, _), do: raise("There was no fit for model")
25-
26-
defp calculate_score(coefficients, factors, results) do
27-
1.0 - sum_of_squared_errors(coefficients, factors, results) / total_sum_of_squares(results)
28-
end
29-
30-
defp total_sum_of_squares(list) do
31-
mean_list = Math.mean(list)
32-
Enum.reduce(list, 0, fn x, acc -> acc + :math.pow(x - mean_list, 2) end)
33-
end
34-
35-
defp sum_of_squared_errors(coefficients, factors, results) do
36-
Enum.zip(factors, results)
37-
|> Enum.reduce(0, fn {xi, yi}, acc -> acc + squared_prediction_error(coefficients, xi, yi) end)
38-
end
39-
40-
defp squared_prediction_error(coefficients, x, y) do
41-
coefficients
42-
|> prediction_error(x, y)
43-
|> :math.pow(2)
44-
end
39+
defp squared_error_gradient(linear, x, y) do
40+
error_variable = prediction_error(linear, x, y)
4541

46-
defp squared_error_gradient(coefficients, x, y) do
47-
error_variable = prediction_error(coefficients, x, y)
4842
[
4943
-2 * error_variable,
5044
-2 * error_variable * x
5145
]
5246
end
5347

54-
defp prediction_error(coefficients, x, y) do
55-
y - predict_sample(x, coefficients)
56-
end
57-
58-
defp gradient_descent_iteration(_, _, min_theta, _, _, iterations_with_no_improvement) when iterations_with_no_improvement >= 100, do: min_theta
59-
60-
defp gradient_descent_iteration(theta, alpha, min_theta, min_value, data, iterations_with_no_improvement) do
48+
defp gradient_descent_iteration(_, _, min_theta, _, _, iterations_with_no_improvement)
49+
when iterations_with_no_improvement >= 100,
50+
do: min_theta
51+
52+
defp gradient_descent_iteration(
53+
theta,
54+
alpha,
55+
min_theta,
56+
min_value,
57+
data,
58+
iterations_with_no_improvement
59+
) do
6160
[
6261
min_theta,
6362
min_value,
@@ -69,14 +68,26 @@ defmodule LearnKit.Regression.Linear.Calculations do
6968
data
7069
|> Enum.shuffle()
7170
|> Enum.reduce(theta, fn {xi, yi}, acc ->
72-
gradient_i = squared_error_gradient(acc, xi, yi)
71+
gradient_i = squared_error_gradient(%Linear{coefficients: theta}, xi, yi)
7372
acc |> Math.vector_subtraction(alpha |> Math.scalar_multiply(gradient_i))
7473
end)
75-
gradient_descent_iteration(theta, alpha, min_theta, min_value, data, iterations_with_no_improvement)
74+
75+
gradient_descent_iteration(
76+
theta,
77+
alpha,
78+
min_theta,
79+
min_value,
80+
data,
81+
iterations_with_no_improvement
82+
)
7683
end
7784

7885
defp check_value(data, min_value, theta, min_theta, iterations_with_no_improvement, alpha) do
79-
value = Enum.reduce(data, 0, fn {xi, yi}, acc -> acc + squared_prediction_error(theta, xi, yi) end)
86+
value =
87+
Enum.reduce(data, 0, fn {xi, yi}, acc ->
88+
acc + squared_prediction_error(%Linear{coefficients: theta}, xi, yi)
89+
end)
90+
8091
cond do
8192
value < min_value ->
8293
[theta, value, 0, 0.0001]
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
defmodule LearnKit.Regression.Polynomial do
2+
@moduledoc """
3+
Module for Polynomial Regression algorithm
4+
"""
5+
6+
defstruct factors: [], results: [], coefficients: [], degree: 2
7+
8+
alias LearnKit.Regression.Polynomial
9+
10+
use Polynomial.Calculations
11+
use LearnKit.Regression.Score
12+
13+
@type factors :: [number]
14+
@type results :: [number]
15+
@type coefficients :: [number]
16+
@type degree :: integer
17+
18+
@doc """
19+
Creates polynomial predictor with data_set
20+
21+
## Parameters
22+
23+
- factors: Array of predictor variables
24+
- results: Array of criterion variables
25+
26+
## Examples
27+
28+
iex> predictor = LearnKit.Regression.Polynomial.new([1, 2, 3, 4], [3, 6, 10, 15])
29+
%LearnKit.Regression.Polynomial{factors: [1, 2, 3, 4], results: [3, 6, 10, 15], coefficients: [], degree: 2}
30+
31+
"""
32+
@spec new(factors, results) :: %Polynomial{
33+
factors: factors,
34+
results: results,
35+
coefficients: [],
36+
degree: 2
37+
}
38+
def new(factors, results) when is_list(factors) and is_list(results) do
39+
%Polynomial{factors: factors, results: results}
40+
end
41+
42+
def new(_, _), do: Polynomial.new([], [])
43+
def new, do: Polynomial.new([], [])
44+
45+
@doc """
46+
Fit train data
47+
48+
## Parameters
49+
50+
- predictor: %LearnKit.Regression.Polynomial{}
51+
- options: keyword list with options
52+
53+
## Options
54+
55+
- degree: nth degree of polynomial model, default set to 2
56+
57+
## Examples
58+
59+
iex> predictor = predictor |> LearnKit.Regression.Polynomial.fit
60+
%LearnKit.Regression.Polynomial{
61+
coefficients: [0.9999999999998295, 1.5000000000000853, 0.4999999999999787],
62+
degree: 2,
63+
factors: [1, 2, 3, 4],
64+
results: [3, 6, 10, 15]
65+
}
66+
67+
iex> predictor = predictor |> LearnKit.Regression.Polynomial.fit([degree: 3])
68+
%LearnKit.Regression.Polynomial{
69+
coefficients: [1.0000000000081855, 1.5000000000013642, 0.5,
70+
8.526512829121202e-14],
71+
degree: 3,
72+
factors: [1, 2, 3, 4],
73+
results: [3, 6, 10, 15]
74+
}
75+
76+
"""
77+
@spec fit(%Polynomial{factors: factors, results: results}) :: %Polynomial{
78+
factors: factors,
79+
results: results,
80+
coefficients: coefficients,
81+
degree: degree
82+
}
83+
def fit(%Polynomial{factors: factors, results: results}, options \\ []) do
84+
degree = options[:degree] || 2
85+
matrix = matrix(factors, degree)
86+
xys = x_y_matrix(factors, results, degree + 1, [])
87+
coefficients = matrix |> Matrix.inv() |> Matrix.mult(xys) |> List.flatten()
88+
%Polynomial{factors: factors, results: results, coefficients: coefficients, degree: degree}
89+
end
90+
91+
@doc """
92+
Predict using the polynomial model
93+
94+
## Parameters
95+
96+
- predictor: %LearnKit.Regression.Polynomial{}
97+
- samples: Array of variables
98+
99+
## Examples
100+
101+
iex> predictor |> LearnKit.Regression.Polynomial.predict([5,6])
102+
{:ok, [20.999999999999723, 27.999999999999574]}
103+
104+
"""
105+
@spec predict(%Polynomial{coefficients: coefficients, degree: degree}, list) :: {:ok, list}
106+
def predict(polynomial = %Polynomial{coefficients: _, degree: _}, samples)
107+
when is_list(samples) do
108+
{:ok, do_predict(polynomial, samples)}
109+
end
110+
111+
@doc """
112+
Predict using the polynomial model
113+
114+
## Parameters
115+
116+
- predictor: %LearnKit.Regression.Polynomial{}
117+
- sample: Sample variable
118+
119+
## Examples
120+
121+
iex> predictor |> LearnKit.Regression.Polynomial.predict(5)
122+
{:ok, 20.999999999999723}
123+
124+
"""
125+
@spec predict(%Polynomial{coefficients: coefficients, degree: degree}, number) :: {:ok, number}
126+
def predict(%Polynomial{coefficients: coefficients, degree: degree}, sample) do
127+
ordered_coefficients = coefficients |> Enum.reverse()
128+
{:ok, substitute_coefficients(ordered_coefficients, sample, degree, 0.0)}
129+
end
130+
end

0 commit comments

Comments
 (0)