Skip to content

Commit d0f4506

Browse files
committed
Benchmark mode with OCL/PTX metrics profiling enabled
1 parent d3f1ea3 commit d0f4506

File tree

32 files changed

+365
-255
lines changed

32 files changed

+365
-255
lines changed

assembly/src/docs/9_PROFILER.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# TornadoVM Profiler
22

3-
To enable the TornadoVM profiler use `-Dtornado.profiler=True`.
3+
To enable the TornadoVM profiler use the Java flag `-Dtornado.profiler=True`.
44

55
Example:
66

benchmarks/src/main/java/uk/ac/manchester/tornado/benchmarks/BenchmarkDriver.java

Lines changed: 99 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
import static java.util.Arrays.sort;
2222
import static uk.ac.manchester.tornado.api.utils.TornadoUtilities.humanReadableByteCount;
2323

24+
import java.util.ArrayList;
25+
import java.util.List;
26+
27+
import uk.ac.manchester.tornado.api.TaskSchedule;
2428
import uk.ac.manchester.tornado.api.common.TornadoDevice;
2529
import uk.ac.manchester.tornado.api.runtime.TornadoRuntime;
2630

@@ -36,8 +40,15 @@ public abstract class BenchmarkDriver {
3640
private double elapsed;
3741
private boolean validResult;
3842
private double[] timers;
43+
44+
private List<Long> deviceKernelTimers;
45+
private List<Long> deviceCopyIn;
46+
private List<Long> deviceCopyOut;
47+
3948
private int startingIndex = 30;
4049

50+
protected TaskSchedule ts;
51+
4152
public BenchmarkDriver(long iterations) {
4253
this.iterations = iterations;
4354
}
@@ -58,6 +69,10 @@ public void tearDown() {
5869

5970
public abstract void benchmarkMethod(TornadoDevice device);
6071

72+
public TaskSchedule getTaskSchedule() {
73+
return ts;
74+
}
75+
6176
protected void barrier() {
6277

6378
}
@@ -74,7 +89,7 @@ private boolean skipGC() {
7489
return true;
7590
}
7691

77-
public void benchmark(TornadoDevice device) {
92+
public void benchmark(TornadoDevice device, boolean isProfilerEnabled) {
7893

7994
setUp();
8095

@@ -84,6 +99,12 @@ public void benchmark(TornadoDevice device) {
8499

85100
timers = new double[size];
86101

102+
if (isProfilerEnabled) {
103+
deviceKernelTimers = new ArrayList<>();
104+
deviceCopyIn = new ArrayList<>();
105+
deviceCopyOut = new ArrayList<>();
106+
}
107+
87108
if (validResult) {
88109
for (long i = 0; i < iterations; i++) {
89110
if (!skipGC()) {
@@ -92,29 +113,40 @@ public void benchmark(TornadoDevice device) {
92113
final long start = System.nanoTime();
93114
benchmarkMethod(device);
94115
final long end = System.nanoTime();
116+
117+
if (isProfilerEnabled) {
118+
119+
// Ensure the execution was correct, so we can count for general stats.
120+
if (getTaskSchedule().getDeviceKernelTime() != 0) {
121+
deviceKernelTimers.add(getTaskSchedule().getDeviceKernelTime());
122+
}
123+
if (getTaskSchedule().getDeviceWriteTime() != 0) {
124+
deviceCopyIn.add(getTaskSchedule().getDeviceWriteTime());
125+
}
126+
if (getTaskSchedule().getDeviceReadTime() != 0) {
127+
deviceCopyOut.add(getTaskSchedule().getDeviceReadTime());
128+
}
129+
}
130+
95131
timers[toIntExact(i)] = (end - start);
96132
}
97133
barrier();
98134
}
99135
tearDown();
100136
}
101137

102-
public double getBestExecution() {
103-
double minValue = timers[0];
104-
for (int i = 1; i < timers.length; i++) {
105-
if (timers[i] < minValue) {
106-
minValue = timers[i];
138+
public double getMin(double[] arr) {
139+
double minValue = arr[0];
140+
for (int i = 1; i < arr.length; i++) {
141+
if (arr[i] < minValue) {
142+
minValue = arr[i];
107143
}
108144
}
109145
return minValue;
110146
}
111147

112-
public double getFirstIteration() {
113-
return timers[0];
114-
}
115-
116-
public double getMedian() {
117-
double[] temp = timers.clone();
148+
public double getMedian(double[] arr) {
149+
double[] temp = arr.clone();
118150
sort(temp);
119151
if (temp.length % 2 == 0) {
120152
return ((temp[temp.length / 2] + temp[temp.length / 2 - 1]) / 2);
@@ -123,19 +155,67 @@ public double getMedian() {
123155
}
124156
}
125157

126-
public double getMean() {
158+
public double[] toArray(List<Long> list) {
159+
return list.stream().mapToDouble(i -> i).toArray();
160+
}
161+
162+
public double getBestKernelTime() {
163+
return getMin(toArray(deviceKernelTimers));
164+
}
165+
166+
public double getMedianKernelTime() {
167+
return getMedian(toArray(deviceKernelTimers));
168+
}
169+
170+
public double getAverageKernelTime() {
171+
return getAverage(toArray(deviceKernelTimers));
172+
}
173+
174+
public double getAverageCopyInTime() {
175+
return getAverage(toArray(deviceCopyIn));
176+
}
177+
178+
public double getAverageCopyOutTime() {
179+
return getAverage(toArray(deviceCopyOut));
180+
}
181+
182+
public double getBestCopyIn() {
183+
return getMin(toArray(deviceCopyIn));
184+
}
185+
186+
public double getBestCopyOut() {
187+
return getMin(toArray(deviceCopyOut));
188+
}
189+
190+
public double getBestExecution() {
191+
return getMin(timers);
192+
}
193+
194+
public double getFirstIteration() {
195+
return timers[0];
196+
}
197+
198+
public double getMedian() {
199+
return getMedian(timers);
200+
}
201+
202+
public double getAverage(double[] arr) {
127203
double sum = 0.0;
128-
if (timers.length <= startingIndex) {
204+
if (arr.length <= startingIndex) {
129205
startingIndex = 0;
130206
}
131-
for (int i = startingIndex; i < timers.length; i++) {
132-
sum += timers[i];
207+
for (int i = startingIndex; i < arr.length; i++) {
208+
sum += arr[i];
133209
}
134210
return sum / (iterations - startingIndex);
135211
}
136212

213+
public double getAverage() {
214+
return getAverage(timers);
215+
}
216+
137217
public double getVariance() {
138-
double mean = getMean();
218+
double mean = getAverage();
139219
double temp = 0;
140220
for (int i = startingIndex; i < timers.length; i++) {
141221
temp += (timers[i] - mean) * (timers[i] - mean);
@@ -148,7 +228,7 @@ public double getStdDev() {
148228
}
149229

150230
public double getCV() {
151-
return (getStdDev() / getMean()) * 100;
231+
return (getStdDev() / getAverage()) * 100;
152232
}
153233

154234
public double getElapsed() {
@@ -164,7 +244,7 @@ public boolean isValid() {
164244
}
165245

166246
public String getPreciseSummary() {
167-
return String.format("average=%6e, median=%6e, firstIteration=%6e, best=%6e", getMean(), getMedian(), getFirstIteration(), getBestExecution());
247+
return String.format("average=%6e, median=%6e, firstIteration=%6e, best=%6e", getAverage(), getMedian(), getFirstIteration(), getBestExecution());
168248
}
169249

170250
public String getSummary() {

benchmarks/src/main/java/uk/ac/manchester/tornado/benchmarks/BenchmarkRunner.java

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ public abstract class BenchmarkRunner {
3333

3434
private static final boolean TORNADO_ENABLED = Boolean.parseBoolean(TornadoRuntime.getProperty("tornado.enable", "True"));
3535

36+
private static final boolean TORNADO_PROFILER = TornadoRuntime.isProfilerEnabled();
37+
3638
protected abstract String getName();
3739

3840
protected abstract String getIdString();
@@ -56,20 +58,20 @@ public void run() {
5658
final double refElapsedMedian;
5759
final double refFirstIteration;
5860

59-
if (!SKIP_SERIAL) {
61+
if (!TORNADO_PROFILER && !SKIP_SERIAL) {
6062
// Run the Java Reference
6163
final BenchmarkDriver referenceTest = getJavaDriver();
62-
referenceTest.benchmark(null);
64+
referenceTest.benchmark(null, false);
6365

6466
System.out.printf("bm=%-15s, id=%-20s, %s\n", id, "java-reference", referenceTest.getPreciseSummary());
6567

66-
refElapsed = referenceTest.getMean();
68+
refElapsed = referenceTest.getAverage();
6769
refElapsedMedian = referenceTest.getMedian();
6870
refFirstIteration = referenceTest.getFirstIteration();
6971

7072
final BenchmarkDriver streamsTest = getStreamsDriver();
7173
if (streamsTest != null && !SKIP_STREAMS) {
72-
streamsTest.benchmark(null);
74+
streamsTest.benchmark(null, false);
7375
System.out.printf("bm=%-15s, id=%-20s, %s\n", id, "java-streams", streamsTest.getSummary());
7476
}
7577
} else {
@@ -113,15 +115,34 @@ private void benchmarkAll(String id, double refElapsed, double refElapsedMedian,
113115

114116
TornadoRuntime.setProperty("benchmark.device", driverIndex + ":" + deviceIndex);
115117
final BenchmarkDriver benchmarkDriver = getTornadoDriver();
116-
118+
boolean isProfilerEnabled = TORNADO_PROFILER;
117119
try {
118-
benchmarkDriver.benchmark(tornadoDevice);
120+
benchmarkDriver.benchmark(tornadoDevice, isProfilerEnabled);
119121
} catch (Exception e) {
120122
e.printStackTrace();
121123
}
122-
System.out.printf("bm=%-15s, device=%-5s, %s, speedupAvg=%.4f, speedupMedian=%.4f, speedupFirstIteration=%.4f, CV=%.4f%%, deviceName=%s\n", id, driverIndex + ":" + deviceIndex,
123-
benchmarkDriver.getPreciseSummary(), refElapsed / benchmarkDriver.getMean(), refElapsedMedian / benchmarkDriver.getMedian(),
124-
refFirstIteration / benchmarkDriver.getFirstIteration(), benchmarkDriver.getCV(), driver.getDevice(deviceIndex));
124+
125+
if (!isProfilerEnabled) {
126+
System.out.printf("bm=%-15s, device=%-5s, %s, speedupAvg=%.4f, speedupMedian=%.4f, speedupFirstIteration=%.4f, CV=%.4f%%, deviceName=%s\n", //
127+
id, //
128+
driverIndex + ":" + deviceIndex, //
129+
benchmarkDriver.getPreciseSummary(), //
130+
refElapsed / benchmarkDriver.getAverage(), //
131+
refElapsedMedian / benchmarkDriver.getMedian(), //
132+
refFirstIteration / benchmarkDriver.getFirstIteration(), //
133+
benchmarkDriver.getCV(), //
134+
driver.getDevice(deviceIndex));
135+
} else {
136+
// Profiler enabled
137+
System.out.printf("bm=%-15s, device=%-5s, kernelMin=%.2f, kernelAvg=%.2f, copyInAvg=%.2f, copyOutAvg=%.2f, deviceName=%s\n", //
138+
id, //
139+
driverIndex + ":" + deviceIndex, //
140+
benchmarkDriver.getBestKernelTime(), //
141+
benchmarkDriver.getAverageKernelTime(), //
142+
benchmarkDriver.getAverageCopyInTime(), //
143+
benchmarkDriver.getAverageCopyOutTime(), //
144+
driver.getDevice(deviceIndex));
145+
}
125146

126147
}
127148
}
@@ -138,17 +159,24 @@ private void benchmarkSelected(String id, String selectedDevices, double refElap
138159
final BenchmarkDriver deviceTest = getTornadoDriver();
139160
final TornadoDriver driver = TornadoRuntime.getTornadoRuntime().getDriver(driverIndex);
140161
final TornadoDevice tornadoDevice = driver.getDevice(deviceIndex);
141-
deviceTest.benchmark(tornadoDevice);
162+
deviceTest.benchmark(tornadoDevice, TORNADO_PROFILER);
142163

143164
System.out.printf("bm=%-15s, device=%-5s, %s, speedupAvg=%.4f, speedupMedian=%.4f, speedupFirstIteration=%.4f, CV=%.4f, deviceName=%s\n", id, driverIndex + ":" + deviceIndex,
144-
deviceTest.getPreciseSummary(), refElapsed / deviceTest.getMean(), refElapsedMedian / deviceTest.getMedian(), refFirstIteration / deviceTest.getFirstIteration(),
165+
deviceTest.getPreciseSummary(), refElapsed / deviceTest.getAverage(), refElapsedMedian / deviceTest.getMedian(), refFirstIteration / deviceTest.getFirstIteration(),
145166
deviceTest.getCV(), driver.getDevice(deviceIndex));
146167
}
147168
}
148169

149170
public abstract void parseArgs(String[] args);
150171

151172
public static void main(String[] args) {
173+
174+
if (args.length < 1) {
175+
String buffer = "[ERROR] Provide a benchmark to run " + "\n Example: $ tornado uk.ac.manchester.tornado.benchmarks.BenchmarkRunner juliaset 10 4096";
176+
System.out.println(buffer);
177+
System.exit(0);
178+
}
179+
152180
try {
153181
final String canonicalName = String.format("%s.%s.Benchmark", BenchmarkRunner.class.getPackage().getName(), args[0]);
154182
final BenchmarkRunner benchmarkRunner = (BenchmarkRunner) Class.forName(canonicalName).newInstance();

benchmarks/src/main/java/uk/ac/manchester/tornado/benchmarks/addImage/AddTornado.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
*/
1818
package uk.ac.manchester.tornado.benchmarks.addImage;
1919

20+
import java.util.Random;
21+
import java.util.stream.IntStream;
22+
2023
import uk.ac.manchester.tornado.api.TaskSchedule;
2124
import uk.ac.manchester.tornado.api.collections.types.Float4;
2225
import uk.ac.manchester.tornado.api.collections.types.FloatOps;
@@ -26,18 +29,13 @@
2629
import uk.ac.manchester.tornado.benchmarks.BenchmarkDriver;
2730
import uk.ac.manchester.tornado.benchmarks.GraphicsKernels;
2831

29-
import java.util.Random;
30-
import java.util.stream.IntStream;
31-
3232
public class AddTornado extends BenchmarkDriver {
3333

3434
private final int numElementsX;
3535
private final int numElementsY;
3636

3737
private ImageFloat4 a,b,c;
3838

39-
private TaskSchedule ts;
40-
4139
public AddTornado(int iterations, int numElementsX, int numElementsY) {
4240
super(iterations);
4341
this.numElementsX = numElementsX;

0 commit comments

Comments
 (0)