Skip to content

Commit 4f79d05

Browse files
committed
code refactoring, release 0.1.6
1 parent cb20cdf commit 4f79d05

File tree

17 files changed

+74
-144
lines changed

17 files changed

+74
-144
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
55
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
66

7-
## Unreleased
7+
## [0.1.6] - 2019-01-08
88
### Added
99
- Polynomial Regression predictor
1010

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Available preprocessing methods:
99
Available algorithms for prediction:
1010

1111
- Linear Regression
12+
- Polynomial Regression
1213

1314
Available algorithms for classification:
1415

@@ -23,7 +24,7 @@ by adding `learn_kit` to your list of dependencies in `mix.exs`:
2324
```elixir
2425
def deps do
2526
[
26-
{:learn_kit, "~> 0.1.5"}
27+
{:learn_kit, "~> 0.1.6"}
2728
]
2829
end
2930
```

lib/learn_kit/knn.ex

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ defmodule LearnKit.Knn do
66
defstruct data_set: []
77

88
alias LearnKit.Knn
9-
109
use Knn.Classify
1110

1211
@type label :: atom

lib/learn_kit/knn/classify.ex

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,10 @@ defmodule LearnKit.Knn.Classify do
4545
end
4646

4747
# sort distances
48-
defp sort_distances(features) do
49-
Enum.sort(features, &(elem(&1, 0) <= elem(&2, 0)))
50-
end
48+
defp sort_distances(features), do: Enum.sort(features, &(elem(&1, 0) <= elem(&2, 0)))
5149

5250
# take closest features
53-
defp select_closest_features(features, options) do
54-
Enum.take(features, options[:k])
55-
end
51+
defp select_closest_features(features, options), do: Enum.take(features, options[:k])
5652

5753
# check existeness of current feature in data set
5854
defp check_zero_distance(closest_features, options) do
@@ -146,9 +142,7 @@ defmodule LearnKit.Knn.Classify do
146142
end
147143
end
148144

149-
defp accumulate_weight_of_labels([], acc) do
150-
acc
151-
end
145+
defp accumulate_weight_of_labels([], acc), do: acc
152146

153147
defp accumulate_weight_of_labels([{_, key, weight} | tail], acc) do
154148
previous = if Keyword.has_key?(acc, key), do: acc[key], else: 0

lib/learn_kit/naive_bayes/gaussian.ex

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ defmodule LearnKit.NaiveBayes.Gaussian do
66
defstruct data_set: [], fit_data: []
77

88
alias LearnKit.NaiveBayes.Gaussian
9-
109
use Gaussian.Normalize
1110
use Gaussian.Fit
1211
use Gaussian.Classify
@@ -34,9 +33,7 @@ defmodule LearnKit.NaiveBayes.Gaussian do
3433
"""
3534
@spec new() :: %Gaussian{data_set: []}
3635

37-
def new do
38-
Gaussian.new([])
39-
end
36+
def new, do: Gaussian.new([])
4037

4138
@doc """
4239
Creates classifier with data_set
@@ -53,9 +50,7 @@ defmodule LearnKit.NaiveBayes.Gaussian do
5350
"""
5451
@spec new(data_set) :: %Gaussian{data_set: data_set}
5552

56-
def new(data_set) do
57-
%Gaussian{data_set: data_set}
58-
end
53+
def new(data_set), do: %Gaussian{data_set: data_set}
5954

6055
@doc """
6156
Add train data to classifier

lib/learn_kit/preprocessing.ex

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ defmodule LearnKit.Preprocessing do
44
"""
55

66
alias LearnKit.{Preprocessing, Math}
7-
87
use Preprocessing.Normalize
98

109
@type row :: [number]

lib/learn_kit/regression/linear.ex

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ defmodule LearnKit.Regression.Linear do
66
defstruct factors: [], results: [], coefficients: []
77

88
alias LearnKit.Regression.Linear
9-
109
use Linear.Calculations
1110
use LearnKit.Regression.Score
1211

@@ -43,8 +42,9 @@ defmodule LearnKit.Regression.Linear do
4342
"""
4443
@spec new(factors, results) :: %Linear{factors: factors, results: results, coefficients: []}
4544

46-
def new(factors, results) when is_list(factors) and is_list(results),
47-
do: %Linear{factors: factors, results: results}
45+
def new(factors, results) when is_list(factors) and is_list(results) do
46+
%Linear{factors: factors, results: results}
47+
end
4848

4949
@doc """
5050
Fit train data
@@ -75,19 +75,13 @@ defmodule LearnKit.Regression.Linear do
7575
}
7676
7777
"""
78-
@spec fit(%Linear{factors: factors, results: results}) :: %Linear{
79-
factors: factors,
80-
results: results,
81-
coefficients: coefficients
82-
}
83-
84-
def fit(linear = %Linear{factors: factors, results: results}, options \\ [])
85-
when is_list(options) do
78+
@spec fit(%Linear{factors: factors, results: results}) :: %Linear{factors: factors, results: results, coefficients: coefficients}
79+
80+
def fit(linear = %Linear{factors: factors, results: results}, options \\ []) when is_list(options) do
8681
coefficients =
8782
Keyword.merge([method: ""], options)
8883
|> define_method_for_fit()
8984
|> do_fit(linear)
90-
9185
%Linear{factors: factors, results: results, coefficients: coefficients}
9286
end
9387

lib/learn_kit/regression/linear/calculations.ex

Lines changed: 31 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,11 @@ defmodule LearnKit.Regression.Linear.Calculations do
33
Module for fit functions
44
"""
55

6-
alias LearnKit.Math
7-
alias LearnKit.Regression.Linear
6+
alias LearnKit.{Math, Regression.Linear}
87

98
defmacro __using__(_opts) do
109
quote do
11-
defp do_fit(method, %Linear{factors: factors, results: results})
12-
when method == "gradient descent" do
10+
defp do_fit("gradient descent", %Linear{factors: factors, results: results}) do
1311
gradient_descent_iteration(
1412
[:rand.uniform(), :rand.uniform()],
1513
0.0001,
@@ -21,10 +19,7 @@ defmodule LearnKit.Regression.Linear.Calculations do
2119
end
2220

2321
defp do_fit(_, %Linear{factors: factors, results: results}) do
24-
beta =
25-
Math.correlation(factors, results) * Math.standard_deviation(results) /
26-
Math.standard_deviation(factors)
27-
22+
beta = calc_beta(factors, results)
2823
alpha = Math.mean(results) - beta * Math.mean(factors)
2924
[alpha, beta]
3025
end
@@ -36,66 +31,54 @@ defmodule LearnKit.Regression.Linear.Calculations do
3631
end)
3732
end
3833

34+
defp calc_beta(factors, results) do
35+
Math.correlation(factors, results) * Math.standard_deviation(results) / Math.standard_deviation(factors)
36+
end
37+
3938
defp squared_error_gradient(linear, x, y) do
4039
error_variable = prediction_error(linear, x, y)
41-
4240
[
4341
-2 * error_variable,
4442
-2 * error_variable * x
4543
]
4644
end
4745

48-
defp gradient_descent_iteration(_, _, min_theta, _, _, iterations_with_no_improvement)
49-
when iterations_with_no_improvement >= 100,
50-
do: min_theta
46+
defp gradient_descent_iteration(_, _, min_theta, _, _, no_improve_step) when no_improve_step >= 100, do: min_theta
5147

52-
defp gradient_descent_iteration(
53-
theta,
54-
alpha,
55-
min_theta,
56-
min_value,
57-
data,
58-
iterations_with_no_improvement
59-
) do
48+
defp gradient_descent_iteration(theta, alpha, min_theta, min_value, data, no_improve_step) do
6049
[
6150
min_theta,
6251
min_value,
63-
iterations_with_no_improvement,
52+
no_improve_step,
6453
alpha
65-
] = check_value(data, min_value, theta, min_theta, iterations_with_no_improvement, alpha)
54+
] = check_value(data, min_value, theta, min_theta, no_improve_step, alpha)
6655

67-
theta =
68-
data
69-
|> Enum.shuffle()
70-
|> Enum.reduce(theta, fn {xi, yi}, acc ->
71-
gradient_i = squared_error_gradient(%Linear{coefficients: theta}, xi, yi)
72-
acc |> Math.vector_subtraction(alpha |> Math.scalar_multiply(gradient_i))
73-
end)
74-
75-
gradient_descent_iteration(
76-
theta,
77-
alpha,
78-
min_theta,
79-
min_value,
80-
data,
81-
iterations_with_no_improvement
82-
)
56+
calc_new_theta(data, theta, alpha)
57+
|> gradient_descent_iteration(alpha, min_theta, min_value, data, no_improve_step)
8358
end
8459

85-
defp check_value(data, min_value, theta, min_theta, iterations_with_no_improvement, alpha) do
86-
value =
87-
Enum.reduce(data, 0, fn {xi, yi}, acc ->
88-
acc + squared_prediction_error(%Linear{coefficients: theta}, xi, yi)
89-
end)
60+
defp calc_new_theta(data, theta, alpha) do
61+
data
62+
|> Enum.shuffle()
63+
|> Enum.reduce(theta, fn {xi, yi}, acc ->
64+
gradient_i = squared_error_gradient(%Linear{coefficients: theta}, xi, yi)
65+
acc |> Math.vector_subtraction(alpha |> Math.scalar_multiply(gradient_i))
66+
end)
67+
end
9068

69+
defp check_value(data, min_value, theta, min_theta, no_improve_step, alpha) do
70+
value = calc_new_value(data, theta)
9171
cond do
92-
value < min_value ->
93-
[theta, value, 0, 0.0001]
94-
95-
true ->
96-
[min_theta, min_value, iterations_with_no_improvement + 1, alpha * 0.9]
72+
value < min_value -> [theta, value, 0, 0.0001]
73+
true -> [min_theta, min_value, no_improve_step + 1, alpha * 0.9]
9774
end
9875
end
76+
77+
defp calc_new_value(data, theta) do
78+
Enum.reduce(data, 0, fn {xi, yi}, acc ->
79+
acc + squared_prediction_error(%Linear{coefficients: theta}, xi, yi)
80+
end)
81+
end
9982
end
10083
end
10184
end

lib/learn_kit/regression/polynomial.ex

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ defmodule LearnKit.Regression.Polynomial do
66
defstruct factors: [], results: [], coefficients: [], degree: 2
77

88
alias LearnKit.Regression.Polynomial
9-
109
use Polynomial.Calculations
1110
use LearnKit.Regression.Score
1211

@@ -29,12 +28,8 @@ defmodule LearnKit.Regression.Polynomial do
2928
%LearnKit.Regression.Polynomial{factors: [1, 2, 3, 4], results: [3, 6, 10, 15], coefficients: [], degree: 2}
3029
3130
"""
32-
@spec new(factors, results) :: %Polynomial{
33-
factors: factors,
34-
results: results,
35-
coefficients: [],
36-
degree: 2
37-
}
31+
@spec new(factors, results) :: %Polynomial{factors: factors, results: results, coefficients: [], degree: 2}
32+
3833
def new(factors, results) when is_list(factors) and is_list(results) do
3934
%Polynomial{factors: factors, results: results}
4035
end
@@ -74,12 +69,8 @@ defmodule LearnKit.Regression.Polynomial do
7469
}
7570
7671
"""
77-
@spec fit(%Polynomial{factors: factors, results: results}) :: %Polynomial{
78-
factors: factors,
79-
results: results,
80-
coefficients: coefficients,
81-
degree: degree
82-
}
72+
@spec fit(%Polynomial{factors: factors, results: results}) :: %Polynomial{factors: factors, results: results, coefficients: coefficients, degree: degree}
73+
8374
def fit(%Polynomial{factors: factors, results: results}, options \\ []) do
8475
degree = options[:degree] || 2
8576
matrix = matrix(factors, degree)
@@ -103,8 +94,8 @@ defmodule LearnKit.Regression.Polynomial do
10394
10495
"""
10596
@spec predict(%Polynomial{coefficients: coefficients, degree: degree}, list) :: {:ok, list}
106-
def predict(polynomial = %Polynomial{coefficients: _, degree: _}, samples)
107-
when is_list(samples) do
97+
98+
def predict(polynomial = %Polynomial{coefficients: _, degree: _}, samples) when is_list(samples) do
10899
{:ok, do_predict(polynomial, samples)}
109100
end
110101

@@ -123,6 +114,7 @@ defmodule LearnKit.Regression.Polynomial do
123114
124115
"""
125116
@spec predict(%Polynomial{coefficients: coefficients, degree: degree}, number) :: {:ok, number}
117+
126118
def predict(%Polynomial{coefficients: coefficients, degree: degree}, sample) do
127119
ordered_coefficients = coefficients |> Enum.reverse()
128120
{:ok, substitute_coefficients(ordered_coefficients, sample, degree, 0.0)}

lib/learn_kit/regression/polynomial/calculations.ex

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ defmodule LearnKit.Regression.Polynomial.Calculations do
1414

1515
defp matrix_line(1, factors, degree) do
1616
power_ofs = Enum.to_list(1..degree)
17-
1817
[Enum.count(factors) | sum_of_x_i_with_k(power_ofs, factors)]
1918
end
2019

@@ -26,7 +25,6 @@ defmodule LearnKit.Regression.Polynomial.Calculations do
2625

2726
defp matrix(factors, degree) do
2827
lines = Enum.to_list(1..(degree + 1))
29-
3028
Enum.map(lines, fn line ->
3129
matrix_line(line, factors, degree)
3230
end)

0 commit comments

Comments
 (0)