1+ package org .steelhawks .util ;
2+
3+ import Jama .Matrix ;
4+ import Jama .QRDecomposition ;
5+
6+ // NOTE: This file is available at
7+ // http://algs4.cs.princeton.edu/14analysis/PolynomialRegression.java.html
8+
9+ /**
10+ * The {@code PolynomialRegression} class performs a polynomial regression on an set of <em>N</em>
11+ * data points (<em>y<sub>i</sub></em>, <em>x<sub>i</sub></em>). That is, it fits a polynomial
12+ * <em>y</em> = β<sub>0</sub> + β<sub>1</sub> <em>x</em> + β<sub>2</sub>
13+ * <em>x</em><sup>2</sup> + ... + β<sub><em>d</em></sub> <em>x</em><sup><em>d</em></sup> (where
14+ * <em>y</em> is the response variable, <em>x</em> is the predictor variable, and the
15+ * β<sub><em>i</em></sub> are the regression coefficients) that minimizes the sum of squared
16+ * residuals of the multiple regression model. It also computes associated the coefficient of
17+ * determination <em>R</em><sup>2</sup>.
18+ *
19+ * <p>This implementation performs a QR-decomposition of the underlying Vandermonde matrix, so it is
20+ * neither the fastest nor the most numerically stable way to perform the polynomial regression.
21+ *
22+ * @author Robert Sedgewick
23+ * @author Kevin Wayne
24+ */
25+ public class PolynomialRegression implements Comparable <PolynomialRegression > {
26+ private final String variableName ; // name of the predictor variable
27+ private int degree ; // degree of the polynomial regression
28+ private Matrix beta ; // the polynomial regression coefficients
29+ private double sse ; // sum of squares due to error
30+ private double sst ; // total sum of squares
31+
32+ /**
33+ * Performs a polynomial reggression on the data points {@code (y[i], x[i])}. Uses n as the name
34+ * of the predictor variable.
35+ *
36+ * @param x the values of the predictor variable
37+ * @param y the corresponding values of the response variable
38+ * @param degree the degree of the polynomial to fit
39+ * @throws IllegalArgumentException if the lengths of the two arrays are not equal
40+ */
41+ public PolynomialRegression (double [] x , double [] y , int degree ) {
42+ this (x , y , degree , "n" );
43+ }
44+
45+ /**
46+ * Performs a polynomial reggression on the data points {@code (y[i], x[i])}.
47+ *
48+ * @param x the values of the predictor variable
49+ * @param y the corresponding values of the response variable
50+ * @param degree the degree of the polynomial to fit
51+ * @param variableName the name of the predictor variable
52+ * @throws IllegalArgumentException if the lengths of the two arrays are not equal
53+ */
54+ public PolynomialRegression (double [] x , double [] y , int degree , String variableName ) {
55+ this .degree = degree ;
56+ this .variableName = variableName ;
57+
58+ int n = x .length ;
59+ QRDecomposition qr = null ;
60+ Matrix matrixX = null ;
61+
62+ // in case Vandermonde matrix does not have full rank, reduce degree until it
63+ // does
64+ while (true ) {
65+
66+ // build Vandermonde matrix
67+ double [][] vandermonde = new double [n ][this .degree + 1 ];
68+ for (int i = 0 ; i < n ; i ++) {
69+ for (int j = 0 ; j <= this .degree ; j ++) {
70+ vandermonde [i ][j ] = Math .pow (x [i ], j );
71+ }
72+ }
73+ matrixX = new Matrix (vandermonde );
74+
75+ // find least squares solution
76+ qr = new QRDecomposition (matrixX );
77+ if (qr .isFullRank ()) break ;
78+
79+ // decrease degree and try again
80+ this .degree --;
81+ }
82+
83+ // create matrix from vector
84+ Matrix matrixY = new Matrix (y , n );
85+
86+ // linear regression coefficients
87+ beta = qr .solve (matrixY );
88+
89+ // mean of y[] values
90+ double sum = 0.0 ;
91+ for (int i = 0 ; i < n ; i ++) sum += y [i ];
92+ double mean = sum / n ;
93+
94+ // total variation to be accounted for
95+ for (int i = 0 ; i < n ; i ++) {
96+ double dev = y [i ] - mean ;
97+ sst += dev * dev ;
98+ }
99+
100+ // variation not accounted for
101+ Matrix residuals = matrixX .times (beta ).minus (matrixY );
102+ sse = residuals .norm2 () * residuals .norm2 ();
103+ }
104+
105+ /**
106+ * Returns the {@code j}th regression coefficient.
107+ *
108+ * @param j the index
109+ * @return the {@code j}th regression coefficient
110+ */
111+ public double beta (int j ) {
112+ // to make -0.0 print as 0.0
113+ if (Math .abs (beta .get (j , 0 )) < 1E-4 ) return 0.0 ;
114+ return beta .get (j , 0 );
115+ }
116+
117+ /**
118+ * Returns the degree of the polynomial to fit.
119+ *
120+ * @return the degree of the polynomial to fit
121+ */
122+ public int degree () {
123+ return degree ;
124+ }
125+
126+ /**
127+ * Returns the coefficient of determination <em>R</em><sup>2</sup>.
128+ *
129+ * @return the coefficient of determination <em>R</em><sup>2</sup>, which is a real number between
130+ * 0 and 1
131+ */
132+ public double R2 () {
133+ if (sst == 0.0 ) return 1.0 ; // constant function
134+ return 1.0 - sse / sst ;
135+ }
136+
137+ /**
138+ * Returns the expected response {@code y} given the value of the predictor variable {@code x}.
139+ *
140+ * @param x the value of the predictor variable
141+ * @return the expected response {@code y} given the value of the predictor variable {@code x}
142+ */
143+ public double predict (double x ) {
144+ // horner's method
145+ double y = 0.0 ;
146+ for (int j = degree ; j >= 0 ; j --) y = beta (j ) + (x * y );
147+ return y ;
148+ }
149+
150+ /**
151+ * Returns a string representation of the polynomial regression model.
152+ *
153+ * @return a string representation of the polynomial regression model, including the best-fit
154+ * polynomial and the coefficient of determination <em>R</em><sup>2</sup>
155+ */
156+ public String toString () {
157+ StringBuilder s = new StringBuilder ();
158+ int j = degree ;
159+
160+ // ignoring leading zero coefficients
161+ while (j >= 0 && Math .abs (beta (j )) < 1E-5 ) j --;
162+
163+ // create remaining terms
164+ while (j >= 0 ) {
165+ if (j == 0 ) s .append (String .format ("%.10f " , beta (j )));
166+ else if (j == 1 ) s .append (String .format ("%.10f %s + " , beta (j ), variableName ));
167+ else s .append (String .format ("%.10f %s^%d + " , beta (j ), variableName , j ));
168+ j --;
169+ }
170+ s = s .append (" (R^2 = " + String .format ("%.3f" , R2 ()) + ")" );
171+
172+ // replace "+ -2n" with "- 2n"
173+ return s .toString ().replace ("+ -" , "- " );
174+ }
175+
176+ /** Compare lexicographically. */
177+ public int compareTo (PolynomialRegression that ) {
178+ double EPSILON = 1E-5 ;
179+ int maxDegree = Math .max (this .degree (), that .degree ());
180+ for (int j = maxDegree ; j >= 0 ; j --) {
181+ double term1 = 0.0 ;
182+ double term2 = 0.0 ;
183+ if (this .degree () >= j ) term1 = this .beta (j );
184+ if (that .degree () >= j ) term2 = that .beta (j );
185+ if (Math .abs (term1 ) < EPSILON ) term1 = 0.0 ;
186+ if (Math .abs (term2 ) < EPSILON ) term2 = 0.0 ;
187+ if (term1 < term2 ) return -1 ;
188+ else if (term1 > term2 ) return +1 ;
189+ }
190+ return 0 ;
191+ }
192+
193+ /**
194+ * Unit tests the {@code PolynomialRegression} data type.
195+ *
196+ * @param args the command-line arguments
197+ */
198+ public static void main (String [] args ) {
199+ double [] x = {10 , 20 , 40 , 80 , 160 , 200 };
200+ double [] y = {100 , 350 , 1500 , 6700 , 20160 , 40000 };
201+ PolynomialRegression regression = new PolynomialRegression (x , y , 3 );
202+
203+ System .out .println (regression );
204+ }
205+ }
0 commit comments