Skip to content

Commit 623f62a

Browse files
committed
Add cuda profiler tools and expose it in Python.
1 parent 322d69f commit 623f62a

File tree

3 files changed

+53
-44
lines changed

3 files changed

+53
-44
lines changed

paddle/platform/cuda_profiler.h

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,33 +14,15 @@ limitations under the License. */
1414

1515
#pragma once
1616
#include <cuda_profiler_api.h>
17+
#include <stdio.h>
1718
#include <stdlib.h>
1819
#include <string.h>
1920

2021
namespace paddle {
2122
namespace platform {
2223

23-
static std::vector<std::string> kCudaProfileConfiguration = {
24-
"gpustarttimestamp",
25-
"gpuendtimestamp",
26-
"gridsize3d",
27-
"threadblocksize",
28-
"dynsmemperblock",
29-
"stasmemperblock",
30-
"regperthread",
31-
"memtransfersize",
32-
"memtransferdir",
33-
"memtransferhostmemtype",
34-
"streamid",
35-
"cacheconfigrequested",
36-
"cacheconfigexecuted",
37-
"countermodeaggregate",
38-
"enableonstart 0",
39-
"active_warps",
40-
"active_cycles",
41-
};
42-
43-
void CudaProfilerInit(std::string output_file, std::string output_mode) {
24+
void CudaProfilerInit(std::string output_file, std::string output_mode,
25+
std::vector<std::string> config_flags) {
4426
std::array<char, 128> buf;
4527
std::string tmpl = "/tmp/cuda_profile_config.XXXXXX";
4628
PADDLE_ENFORCE_LT(tmpl.size(), buf.size());
@@ -52,12 +34,12 @@ void CudaProfilerInit(std::string output_file, std::string output_mode) {
5234
{
5335
std::ofstream ofs(config, std::ios::out | std::ios::trunc);
5436
PADDLE_ENFORCE(ofs.is_open(), "ofstream: ", ofs.rdstate());
55-
for (const auto& line : kCudaProfileConfiguration) {
37+
for (const auto& line : config_flags) {
5638
ofs << line << std::endl;
5739
}
5840
}
5941

60-
PADDLE_ENFORCE(output_mode == "key_value" || output_mode == "csv");
42+
PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv");
6143
cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair;
6244
PADDLE_ENFORCE(
6345
cudaProfilerInitialize(config.c_str(), output_file.c_str(), mode));
@@ -66,5 +48,6 @@ void CudaProfilerInit(std::string output_file, std::string output_mode) {
6648
void CudaProfilerStart() { PADDLE_ENFORCE(cudaProfilerStart()); }
6749

6850
void CudaProfilerStop() { PADDLE_ENFORCE((cudaProfilerStop())); }
69-
}
70-
}
51+
52+
} // namespace platform
53+
} // namespace paddle

python/paddle/v2/fluid/profiler.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,20 @@
11
import paddle.v2.fluid.core as core
2+
import subprocess
23

4+
__all__ = ['CudaProfiler']
35

4-
def nvporf_init(output_file, output_mode=None):
6+
NV_FLAGS = [
7+
"gpustarttimestamp",
8+
"gpuendtimestamp",
9+
"gridsize3d",
10+
"threadblocksize",
11+
"streamid",
12+
"enableonstart 0",
13+
"conckerneltrace",
14+
]
15+
16+
17+
def nvporf_init(output_file, output_mode=None, flags=None):
518
"""
619
Initialize the CUDA profiler.
720
This methods must be called before nvprof_start.
@@ -10,14 +23,15 @@ def nvporf_init(output_file, output_mode=None):
1023
:type output_file: string
1124
:param output_mode: The output mode has Key-Value pair format and
1225
Comma separated values format.
13-
It should be 'key-value' or 'csv'.
26+
It should be 'kv' or 'csv'.
1427
:type output_mode: string
1528
"""
1629
if output_mode is None:
1730
output_mode = 'csv'
18-
if output_mode != 'key-value' or output_mode != 'csv':
31+
if output_mode not in ['kv', 'csv']:
1932
raise ValueError("The output mode must be 'key-value' or 'csv'.")
20-
core.nvprof_init(output_file, output_mode)
33+
flags = NV_FLAGS if flags is None else flags
34+
core.nvprof_init(output_file, output_mode, flags)
2135

2236

2337
def nvporf_start():
@@ -34,13 +48,14 @@ def nvporf_stop():
3448
core.nvprof_stop()
3549

3650

37-
class profiler(object):
38-
def __init__(self, output_file, output_mode=None, enabled=True):
51+
class CudaProfiler(object):
52+
def __init__(self, output_file, output_mode=None, flags=None, enabled=True):
3953
self.enabled = enabled
4054
if not self.enabled:
4155
return
4256
self.entered = False
43-
nvporf_init(output_file, output_mode)
57+
self.out_file = output_file
58+
nvporf_init(output_file, output_mode, flags)
4459

4560
def __enter__(self):
4661
if not self.enabled:
Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,28 @@
1+
import unittest
2+
import numpy as np
3+
import paddle.v2.fluid as fluid
14
import paddle.v2.fluid.profiler as profiler
25
import paddle.v2.fluid.layers as layers
3-
import numpy as np
46

5-
place = core.GPUPlace(0)
6-
exe = Executor(place)
77

8-
epoc = 8
9-
dshape = [4, 3, 28, 28]
10-
data = layers.data(name='data', shape=dshape, dtype='float32')
11-
conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
8+
class TestProfiler(unittest.TestCase):
9+
def test_nvprof(self):
10+
if not fluid.core.is_compile_gpu():
11+
return
12+
epoc = 8
13+
dshape = [4, 3, 28, 28]
14+
data = layers.data(name='data', shape=[3, 28, 28], dtype='float32')
15+
conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
16+
17+
place = fluid.GPUPlace(0)
18+
exe = fluid.Executor(place)
19+
exe.run(fluid.default_startup_program())
20+
21+
with profiler.CudaProfiler("cuda_profiler.txt", 'csv') as nvprof:
22+
for i in range(epoc):
23+
input = np.random.random(dshape).astype("float32")
24+
exe.run(fluid.default_main_program(), feed={'data': input})
25+
1226

13-
input = core.LoDTensor()
14-
with profiler("cuda_profiler.txt") as nvprof:
15-
for i in range(epoc):
16-
input.set(np.random.random(dshape).astype("float32"), place)
17-
exe.run(framework.default_main_program(), feed={'data': data})
27+
if __name__ == '__main__':
28+
unittest.main()

0 commit comments

Comments
 (0)