Skip to content

Commit 272591d

Browse files
committed
dynaml-core: Added spearman correlation to RegressionMetrics
Signed-off-by: mandar2812 <[email protected]>
1 parent 2cc155c commit 272591d

File tree

4 files changed

+69
-53
lines changed

4 files changed

+69
-53
lines changed

build.sbt

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -90,30 +90,17 @@ lazy val DynaML = (project in file(".")).enablePlugins(JavaAppPackaging, BuildIn
9090
buildInfoKeys := Seq[BuildInfoKey](name, version, scalaVersion, sbtVersion),
9191
buildInfoPackage := "io.github.mandar2812.dynaml.repl",
9292
buildInfoUsePackageAsPath := true,
93-
mappings in Universal ++= Seq({
94-
// we are using the reference.conf as default application.conf
95-
// the user can override settings here
96-
val init = (resourceDirectory in Compile).value / "DynaMLInit.scala"
97-
init -> "conf/DynaMLInit.scala"
98-
}, {
99-
val banner = (resourceDirectory in Compile).value / "dynamlBanner.txt"
100-
banner -> "conf/banner.txt"
101-
}, {
102-
val zeppelin_env = (resourceDirectory in Compile).value / "zeppelin-site.xml"
103-
zeppelin_env -> "conf/zeppelin-site.xml"
104-
}, {
105-
val zeppelin_shiro = (resourceDirectory in Compile).value / "shiro.ini.template"
106-
zeppelin_shiro -> "conf/shiro.ini"
107-
}, {
108-
val zeppelinConf = (resourceDirectory in Compile).value / "interpreter-setting.json"
109-
zeppelinConf -> "lib/interpreter-setting.json"
110-
}, {
111-
val common = (resourceDirectory in Compile).value / "common.sh"
112-
common -> "bin/common.sh"
113-
}, {
114-
val intp = (resourceDirectory in Compile).value / "interpreter.sh"
115-
intp -> "bin/interpreter.sh"
116-
}),
93+
mappings in Universal ++= Seq(
94+
{
95+
//Initialization script for the DynaML REPL
96+
val init = (resourceDirectory in Compile).value / "DynaMLInit.scala"
97+
init -> "conf/DynaMLInit.scala"
98+
},
99+
{
100+
val banner = (resourceDirectory in Compile).value / "dynamlBanner.txt"
101+
banner -> "conf/banner.txt"
102+
}
103+
),
117104
javaOptions in Universal ++= Seq(
118105
// -J params will be added as jvm parameters
119106
"-J-Xmx2048m",

dynaml-core/src/main/scala-2.11/io/github/mandar2812/dynaml/evaluation/RegressionMetrics.scala

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,7 @@ import io.github.mandar2812.dynaml.algebra.square
3434

3535
class RegressionMetrics(
3636
override protected val scoresAndLabels: List[(Double, Double)],
37-
val len: Int)
38-
extends Metrics[Double] {
39-
private val logger = Logger.getLogger(this.getClass)
37+
val len: Int) extends Metrics[Double] {
4038

4139
val length: Int = len
4240

@@ -54,6 +52,8 @@ class RegressionMetrics(
5452

5553
val corr: Double = RegressionMetrics.computeCorr(scoresAndLabels, length)
5654

55+
val sp_corr: Double = RegressionMetrics.computeSpearmanCorr(scoresAndLabels, length)
56+
5757
val predictionEfficiency = scoresAndLabels.map((p) =>
5858
math.pow(p._1 - p._2, 2)/length).sum
5959

@@ -85,14 +85,17 @@ class RegressionMetrics(
8585
scala.Predef.print("Corr. Coefficient = ")
8686
pprint.pprintln(corr)
8787

88+
scala.Predef.print("Spearman Corr. Coefficient = ")
89+
pprint.pprintln(sp_corr)
90+
8891
scala.Predef.print("Model Yield = ")
8992
pprint.pprintln(modelYield)
9093

9194
scala.Predef.print("Std Dev of Residuals = ")
9295
pprint.pprintln(sigma)
9396
}
9497

95-
override def kpi() = DenseVector(mae, rmse, Rsq, corr)
98+
override def kpi() = DenseVector(mae, rmse, Rsq, corr, sp_corr)
9699

97100
override def generatePlots(): Unit = {
98101
println("Generating Plot of Residuals")
@@ -130,7 +133,7 @@ object RegressionMetrics {
130133
val mean: Double = scoresAndLabels.map{coup => coup._2}.sum/size
131134
var SSres = 0.0
132135
var SStot = 0.0
133-
scoresAndLabels.foreach((couple) => {
136+
scoresAndLabels.foreach(couple => {
134137
SSres += math.pow(couple._2 - couple._1, 2)
135138
SStot += math.pow(couple._2 - mean, 2)
136139
})
@@ -144,7 +147,7 @@ object RegressionMetrics {
144147
var SSLabel = 0.0
145148
var SSPred = 0.0
146149
var SSLabelPred = 0.0
147-
scoresAndLabels.foreach((couple) => {
150+
scoresAndLabels.foreach(couple => {
148151
SSLabel += math.pow(couple._2 - meanLabel, 2)
149152
SSPred += math.pow(couple._1 - meanScore, 2)
150153
SSLabelPred += (couple._1 - meanScore)*(couple._2 - meanLabel)
@@ -153,16 +156,39 @@ object RegressionMetrics {
153156
SSLabelPred/(math.sqrt(SSPred)*math.sqrt(SSLabel))
154157
}
155158

159+
def computeSpearmanCorr(scoresAndLabels: Iterable[(Double, Double)], size: Int): Double = {
160+
161+
val (predictions, targets) = scoresAndLabels.toSeq.unzip
162+
163+
val addOneToIndex = (p: (Double, Int)) => (p._1, p._2 + 1)
164+
165+
def sort_with_ranks(seq: Seq[Double]) =
166+
seq.sorted
167+
.zipWithIndex
168+
.map(addOneToIndex)
169+
.groupBy(_._1)
170+
.map(coll => (coll._1, coll._2.map(_._2)))
171+
.mapValues(values => values.sum.toDouble/values.length)
172+
173+
val preds_rank_map = sort_with_ranks(predictions)
174+
val targets_rank_map = sort_with_ranks(targets)
175+
176+
val ranks_preds = predictions.map(preds_rank_map(_))
177+
val ranks_targets = targets.map(targets_rank_map(_))
178+
179+
computeCorr(ranks_preds.zip(ranks_targets), size)
180+
}
181+
156182
def computeYield(scoresAndLabels: Iterable[(Double, Double)], size: Int): Double =
157183
(scoresAndLabels.map(_._1).max - scoresAndLabels.map(_._1).min)/
158184
(scoresAndLabels.map(_._2).max - scoresAndLabels.map(_._2).min)
159185

160186
}
161187

162-
class MultiRegressionMetrics(override protected val scoresAndLabels: List[(DenseVector[Double], DenseVector[Double])],
163-
val len: Int)
164-
extends Metrics[DenseVector[Double]] {
165-
private val logger = Logger.getLogger(this.getClass)
188+
class MultiRegressionMetrics(
189+
override protected val scoresAndLabels: List[(DenseVector[Double], DenseVector[Double])],
190+
val len: Int) extends Metrics[DenseVector[Double]] {
191+
166192

167193
val num_outputs: Int = scoresAndLabels.head._2.length
168194

dynaml-core/src/main/scala-2.11/io/github/mandar2812/dynaml/models/gp/GPNarXModel.scala

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,13 @@ import io.github.mandar2812.dynaml.pipes.DataPipe
3535
* f(x) ~ GP(0, cov(X,X))
3636
* e|f(x) ~ N(0, noise(X,X))
3737
*/
38-
class GPNarXModel(order: Int,
39-
ex: Int,
40-
cov: LocalScalarKernel[DenseVector[Double]],
41-
nL: LocalScalarKernel[DenseVector[Double]],
42-
trainingdata: Seq[(DenseVector[Double], Double)],
43-
meanFunc: DataPipe[DenseVector[Double], Double] = DataPipe(_ => 0.0)) extends
38+
class GPNarXModel(
39+
order: Int,
40+
ex: Int,
41+
cov: LocalScalarKernel[DenseVector[Double]],
42+
nL: LocalScalarKernel[DenseVector[Double]],
43+
trainingdata: Seq[(DenseVector[Double], Double)],
44+
meanFunc: DataPipe[DenseVector[Double], Double] = DataPipe(_ => 0.0)) extends
4445
GPRegression(cov, nL, trainingdata, meanFunc) {
4546

4647
val modelOrder = order

dynaml-examples/src/main/scala-2.11/io/github/mandar2812/dynaml/examples/AbottPowerPlant.scala

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ import com.quantifind.charts.Highcharts._
2323
import io.github.mandar2812.dynaml.DynaMLPipe._
2424
import io.github.mandar2812.dynaml.evaluation.{MultiRegressionMetrics, RegressionMetrics}
2525
import io.github.mandar2812.dynaml.kernels.LocalScalarKernel
26-
import io.github.mandar2812.dynaml.models.gp.GPNarXModel
26+
import io.github.mandar2812.dynaml.models.gp.{AbstractGPRegressionModel, GPNarXModel, GPRegression}
2727
import io.github.mandar2812.dynaml.models.stp.MVStudentsTModel
28-
import io.github.mandar2812.dynaml.optimization.{GradBasedGlobalOptimizer, GridSearch, ProbGPCommMachine}
28+
import io.github.mandar2812.dynaml.optimization.{GlobalOptimizer, GradBasedGlobalOptimizer, GridSearch, ProbGPCommMachine}
2929
import io.github.mandar2812.dynaml.pipes.{DataPipe, StreamDataPipe}
3030
import io.github.mandar2812.dynaml.utils.GaussianScaler
3131

@@ -39,6 +39,7 @@ object AbottPowerPlant {
3939
type Features = DenseVector[Double]
4040
type Kernel = LocalScalarKernel[Features]
4141
type Scales = (GaussianScaler, GaussianScaler)
42+
type GPModel = AbstractGPRegressionModel[Seq[(DenseVector[Double], Double)], DenseVector[Double]]
4243

4344
type Data = Stream[(Features, Features)]
4445

@@ -172,20 +173,18 @@ object AbottPowerPlant {
172173
//create RegressionMetrics instance and produce plots
173174

174175
val modelTrainTest =
175-
(trainTest: ((Stream[(Features, Double)],
176-
Stream[(Features, Double)]),
177-
(Features, Features))) => {
176+
(trainTest: ((Stream[(Features, Double)], Stream[(Features, Double)]), (Features, Features))) => {
178177

179178
val model = new GPNarXModel(deltaT, 4, kernel,
180179
noise, trainTest._1._1)
181180

182-
val gs = opt("globalOpt") match {
183-
case "GS" => new GridSearch[model.type](model)
181+
val gs: GlobalOptimizer[GPModel] = opt("globalOpt") match {
182+
case "GS" => new GridSearch[GPModel](model)
184183
.setGridSize(opt("grid").toInt)
185184
.setStepSize(opt("step").toDouble)
186185
.setLogScale(false)
187186

188-
case "ML" => new GradBasedGlobalOptimizer[model.type](model)
187+
case "ML" => new GradBasedGlobalOptimizer(model)
189188

190189
case "GPC" => new ProbGPCommMachine(model)
191190
.setPolicy(opt("policy"))
@@ -198,7 +197,7 @@ object AbottPowerPlant {
198197

199198
val startConf = kernel.effective_state ++ noise.effective_state
200199

201-
val (optModel, _) = gs.optimize(startConf, opt)
200+
val (optModel, _): (GPModel, Map[String, Double]) = gs.optimize(startConf, opt)
202201

203202
val res = optModel.test(trainTest._1._2)
204203

@@ -209,13 +208,16 @@ object AbottPowerPlant {
209208
l._4*trainTest._2._2(-1) + trainTest._2._1(-1))})
210209

211210
val scoresAndLabelsPipe =
212-
DataPipe(
213-
(res: Seq[(Features, Double, Double, Double, Double)]) =>
214-
res.map(i => (i._3, i._2, i._4, i._5)).toList) > deNormalize
211+
DataPipe[
212+
Seq[(Features, Double, Double, Double, Double)],
213+
List[(Double, Double, Double, Double)]](
214+
_.map(i => (i._3, i._2, i._4, i._5)).toList) >
215+
deNormalize
215216

216217
val scoresAndLabels = scoresAndLabelsPipe.run(res.toList)
217218

218-
val metrics = new RegressionMetrics(scoresAndLabels.map(i => (i._1, i._2)),
219+
val metrics = new RegressionMetrics(
220+
scoresAndLabels.map(i => (i._1, i._2)),
219221
scoresAndLabels.length)
220222

221223
val (name, name1) =

0 commit comments

Comments
 (0)