39
39
@inherit_doc
40
40
class LinearRegression (JavaEstimator , HasFeaturesCol , HasLabelCol , HasPredictionCol , HasMaxIter ,
41
41
HasRegParam , HasTol , HasElasticNetParam , HasFitIntercept ,
42
- HasStandardization , HasSolver , HasWeightCol , HasAggregationDepth ,
42
+ HasStandardization , HasSolver , HasWeightCol , HasAggregationDepth , HasLoss ,
43
43
JavaMLWritable , JavaMLReadable ):
44
44
"""
45
45
Linear regression.
46
46
47
- The learning objective is to minimize the squared error , with regularization.
48
- The specific squared error loss function used is: L = 1/2n ||A coefficients - y||^2^
47
+ The learning objective is to minimize the specified loss function , with regularization.
48
+ This supports two kinds of loss:
49
49
50
- This supports multiple types of regularization:
51
-
52
- * none (a.k.a. ordinary least squares )
50
+ * squaredError (a.k.a squared loss)
51
+ * huber (a hybrid of squared error for relatively small errors and absolute error for \
52
+ relatively large ones, and we estimate the scale parameter from training data )
53
53
54
- * L2 (ridge regression)
54
+ This supports multiple types of regularization:
55
55
56
- * L1 (Lasso)
56
+ * none (a.k.a. ordinary least squares)
57
+ * L2 (ridge regression)
58
+ * L1 (Lasso)
59
+ * L2 + L1 (elastic net)
57
60
58
- * L2 + L1 (elastic net)
61
+ Note: Fitting with huber loss only supports none and L2 regularization.
59
62
60
63
>>> from pyspark.ml.linalg import Vectors
61
64
>>> df = spark.createDataFrame([
@@ -98,31 +101,42 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
98
101
solver = Param (Params ._dummy (), "solver" , "The solver algorithm for optimization. Supported " +
99
102
"options: auto, normal, l-bfgs." , typeConverter = TypeConverters .toString )
100
103
104
+ loss = Param (Params ._dummy (), "loss" , "The loss function to be optimized. Supported " +
105
+ "options: squaredError, huber." , typeConverter = TypeConverters .toString )
106
+
107
+ epsilon = Param (Params ._dummy (), "epsilon" , "The shape parameter to control the amount of " +
108
+ "robustness. Must be > 1.0. Only valid when loss is huber" ,
109
+ typeConverter = TypeConverters .toFloat )
110
+
101
111
@keyword_only
102
112
def __init__ (self , featuresCol = "features" , labelCol = "label" , predictionCol = "prediction" ,
103
113
maxIter = 100 , regParam = 0.0 , elasticNetParam = 0.0 , tol = 1e-6 , fitIntercept = True ,
104
- standardization = True , solver = "auto" , weightCol = None , aggregationDepth = 2 ):
114
+ standardization = True , solver = "auto" , weightCol = None , aggregationDepth = 2 ,
115
+ loss = "squaredError" , epsilon = 1.35 ):
105
116
"""
106
117
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
107
118
maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
108
- standardization=True, solver="auto", weightCol=None, aggregationDepth=2)
119
+ standardization=True, solver="auto", weightCol=None, aggregationDepth=2, \
120
+ loss="squaredError", epsilon=1.35)
109
121
"""
110
122
super (LinearRegression , self ).__init__ ()
111
123
self ._java_obj = self ._new_java_obj (
112
124
"org.apache.spark.ml.regression.LinearRegression" , self .uid )
113
- self ._setDefault (maxIter = 100 , regParam = 0.0 , tol = 1e-6 )
125
+ self ._setDefault (maxIter = 100 , regParam = 0.0 , tol = 1e-6 , loss = "squaredError" , epsilon = 1.35 )
114
126
kwargs = self ._input_kwargs
115
127
self .setParams (** kwargs )
116
128
117
129
@keyword_only
118
130
@since ("1.4.0" )
119
131
def setParams (self , featuresCol = "features" , labelCol = "label" , predictionCol = "prediction" ,
120
132
maxIter = 100 , regParam = 0.0 , elasticNetParam = 0.0 , tol = 1e-6 , fitIntercept = True ,
121
- standardization = True , solver = "auto" , weightCol = None , aggregationDepth = 2 ):
133
+ standardization = True , solver = "auto" , weightCol = None , aggregationDepth = 2 ,
134
+ loss = "squaredError" , epsilon = 1.35 ):
122
135
"""
123
136
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
124
137
maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
125
- standardization=True, solver="auto", weightCol=None, aggregationDepth=2)
138
+ standardization=True, solver="auto", weightCol=None, aggregationDepth=2, \
139
+ loss="squaredError", epsilon=1.35)
126
140
Sets params for linear regression.
127
141
"""
128
142
kwargs = self ._input_kwargs
@@ -131,6 +145,20 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
131
145
def _create_model (self , java_model ):
132
146
return LinearRegressionModel (java_model )
133
147
148
+ @since ("2.3.0" )
149
+ def setEpsilon (self , value ):
150
+ """
151
+ Sets the value of :py:attr:`epsilon`.
152
+ """
153
+ return self ._set (epsilon = value )
154
+
155
+ @since ("2.3.0" )
156
+ def getEpsilon (self ):
157
+ """
158
+ Gets the value of epsilon or its default value.
159
+ """
160
+ return self .getOrDefault (self .epsilon )
161
+
134
162
135
163
class LinearRegressionModel (JavaModel , JavaPredictionModel , JavaMLWritable , JavaMLReadable ):
136
164
"""
@@ -155,6 +183,14 @@ def intercept(self):
155
183
"""
156
184
return self ._call_java ("intercept" )
157
185
186
+ @property
187
+ @since ("2.3.0" )
188
+ def scale (self ):
189
+ """
190
+ The value by which \|y - X'w\| is scaled down when loss is "huber", otherwise 1.0.
191
+ """
192
+ return self ._call_java ("scale" )
193
+
158
194
@property
159
195
@since ("2.0.0" )
160
196
def summary (self ):
0 commit comments