Switch Compute.scala device according to JMH parameter

Atry · Atry · commit 9cd6dcb7209b · 2018-03-20T17:24:51.000+08:00
diff --git a/benchmarks/src/jmh/scala/com/thoughtworks/compute/benchmarks.scala b/benchmarks/src/jmh/scala/com/thoughtworks/compute/benchmarks.scala
@@ -8,7 +8,7 @@ import com.thoughtworks.raii.asynchronous._
 import com.thoughtworks.raii.covariant._
 import com.thoughtworks.tryt.covariant._
 import com.typesafe.scalalogging.StrictLogging
-import org.lwjgl.opencl.CLCapabilities
+import org.lwjgl.opencl.{CL10, CLCapabilities}
 import org.lwjgl.system.Configuration
 import org.nd4j.linalg.api.ndarray.INDArray
 import org.nd4j.linalg.convolution.Convolution
@@ -23,6 +23,28 @@ import scala.util.Try
 
 object benchmarks {
 
+  trait TensorState {
+    @Param(Array("CPU", "GPU"))
+    protected var deviceType: String = _
+
+    trait BenchmarkTensors
+        extends StrictLogging
+        with Tensors.UnsafeMathOptimizations
+        with Tensors.SuppressWarnings
+        with OpenCL.LogContextNotification
+        with OpenCL.GlobalExecutionContext
+        with OpenCL.UseFirstPlatform
+        with OpenCL.CommandQueuePool
+        with OpenCL.DontReleaseEventTooEarly
+        with Tensors.WangHashingRandomNumberGenerator {
+
+      @transient
+      protected lazy val deviceIds: Seq[DeviceId] = {
+        deviceIdsByType(classOf[CL10].getField(s"CL_DEVICE_TYPE_$deviceType").get(null).asInstanceOf[Int])
+      }
+    }
+  }
+
   @Threads(value = Threads.MAX)
   @State(Scope.Benchmark)
   class Nd4jTanh extends TanhState {
@@ -47,18 +69,8 @@ object benchmarks {
 
   @Threads(value = Threads.MAX)
   @State(Scope.Benchmark)
-  class TensorTanh extends TanhState {
-    trait Benchmarks
-        extends StrictLogging
-        with Tensors.UnsafeMathOptimizations
-        with Tensors.SuppressWarnings
-        with OpenCL.LogContextNotification
-        with OpenCL.GlobalExecutionContext
-        with OpenCL.UseAllCpuDevices
-        with OpenCL.UseFirstPlatform
-        with OpenCL.CommandQueuePool
-        with OpenCL.DontReleaseEventTooEarly
-        with Tensors.WangHashingRandomNumberGenerator {
+  class TensorTanh extends TanhState with TensorState {
+    trait Benchmarks extends BenchmarkTensors {
 
       protected val numberOfCommandQueuesPerDevice: Int = 2
 
@@ -130,17 +142,8 @@ object benchmarks {
 
   @Threads(value = Threads.MAX)
   @State(Scope.Benchmark)
-  class TensorSum extends SumState {
-    trait Benchmarks
-        extends StrictLogging
-        with Tensors.UnsafeMathOptimizations
-        with OpenCL.LogContextNotification
-        with OpenCL.GlobalExecutionContext
-        with OpenCL.UseAllCpuDevices
-        with OpenCL.UseFirstPlatform
-        with OpenCL.CommandQueuePool
-        with OpenCL.DontReleaseEventTooEarly
-        with Tensors.WangHashingRandomNumberGenerator {
+  class TensorSum extends SumState with TensorState {
+    trait Benchmarks extends BenchmarkTensors {
 
       protected val numberOfCommandQueuesPerDevice: Int = 2
 
@@ -200,17 +203,8 @@ object benchmarks {
 
   @Threads(value = Threads.MAX)
   @State(Scope.Benchmark)
-  class TensorRandomNormal extends RandomNormalState {
-    trait Benchmarks
-        extends StrictLogging
-        with Tensors.UnsafeMathOptimizations
-        with OpenCL.LogContextNotification
-        with OpenCL.GlobalExecutionContext
-        with OpenCL.UseAllCpuDevices
-        with OpenCL.UseFirstPlatform
-        with OpenCL.CommandQueuePool
-        with OpenCL.DontReleaseEventTooEarly
-        with Tensors.WangHashingRandomNumberGenerator {
+  class TensorRandomNormal extends RandomNormalState with TensorState {
+    trait Benchmarks extends BenchmarkTensors {
 
       protected val numberOfCommandQueuesPerDevice: Int = 2
 
@@ -295,19 +289,9 @@ object benchmarks {
 
   @Threads(value = Threads.MAX)
   @State(Scope.Benchmark)
-  class TensorConvolution extends ConvolutionState {
+  class TensorConvolution extends ConvolutionState with TensorState {
 
-    trait Benchmarks
-        extends StrictLogging
-        with Tensors.UnsafeMathOptimizations
-        with OpenCL.LogContextNotification
-        with OpenCL.GlobalExecutionContext
-        with OpenCL.UseAllCpuDevices
-        with OpenCL.UseFirstPlatform
-        with OpenCL.CommandQueuePool
-        with OpenCL.DontReleaseEventTooEarly
-        with Tensors.WangHashingRandomNumberGenerator
-        with ConvolutionTensors {
+    trait Benchmarks extends BenchmarkTensors with ConvolutionTensors {
 
       protected val numberOfCommandQueuesPerDevice = 2