@@ -25,8 +25,6 @@ import com.github.tototoshi.csv.{CSVReader, DefaultCSVFormat, QUOTE_NONNUMERIC}
2525import org .renjin .script .{RenjinScriptEngine , RenjinScriptEngineFactory }
2626import org .renjin .sexp .SEXP
2727import spire .algebra .InnerProductSpace
28- // import org.apache.spark.mllib.regression.LabeledPoint
29- // import org.apache.spark.rdd.RDD
3028
3129import scala .io .Source
3230import scala .reflect .runtime .{universe => ru }
@@ -37,7 +35,6 @@ import java.net.URL
3735
3836import breeze .stats .distributions .ContinuousDistr
3937import io .github .mandar2812 .dynaml .algebra .PartitionedMatrix
40- // import org.apache.spark.annotation.Experimental
4138
4239import scalaxy .streams .optimize
4340import spire .algebra .{Eq , Field }
@@ -169,28 +166,6 @@ package object utils {
169166 (mean, biasedSigmaSq* adjustment)
170167 }
171168
172- /* @Experimental
173- def getStatsRDD(data: RDD[LabeledPoint]):
174- (Double, Double,
175- DenseVector[Double],
176- DenseMatrix[Double]) = {
177- val (lm, ls, m, s) = data.map((p) => {
178- val label = p.label
179- val features = DenseVector(p.features.toArray)
180- (label, label*label, features, features*features.t)
181- }).reduce((a,b) => {
182- (a._1 + b._1, a._2 + b._2, a._3 + b._3, a._4 + b._4)
183- })
184- val count = data.count().toDouble
185- val labelMean = lm/count
186- val labelVar = (ls/count) - labelMean*labelMean
187- m :/= count
188- s :/= count
189- val featuresCov = s - m*m.t
190-
191- (labelMean, labelVar, m, featuresCov)
192- }*/
193-
194169 def getMinMax (data : List [DenseVector [Double ]]):
195170 (DenseVector [Double ], DenseVector [Double ]) = {
196171 @ tailrec
0 commit comments