Skip to content

Commit 21053c1

Browse files
authored
Merge pull request #5954 from qingqing01/nvprof
Add CUDA profiler tools in new framework.
2 parents e5198e1 + 696b025 commit 21053c1

File tree

4 files changed

+132
-0
lines changed

4 files changed

+132
-0
lines changed

paddle/platform/cuda_profiler.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
#include <cuda_profiler_api.h>
17+
#include <stdio.h>
18+
#include <stdlib.h>
19+
#include <string.h>
20+
21+
namespace paddle {
22+
namespace platform {
23+
24+
void CudaProfilerInit(std::string output_file, std::string output_mode,
25+
std::vector<std::string> config_flags) {
26+
std::array<char, 128> buf;
27+
std::string tmpl = "/tmp/cuda_profile_config.XXXXXX";
28+
PADDLE_ENFORCE_LT(tmpl.size(), buf.size());
29+
memcpy(buf.data(), tmpl.data(), tmpl.size());
30+
auto result = mktemp(buf.data());
31+
PADDLE_ENFORCE(strlen(result) != 0);
32+
std::string config_file = result;
33+
34+
{
35+
std::ofstream ofs(config_file, std::ios::out | std::ios::trunc);
36+
PADDLE_ENFORCE(ofs.is_open(), "ofstream: ", ofs.rdstate());
37+
for (const auto& line : config_flags) {
38+
ofs << line << std::endl;
39+
}
40+
}
41+
42+
PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv");
43+
cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair;
44+
PADDLE_ENFORCE(
45+
cudaProfilerInitialize(config_file.c_str(), output_file.c_str(), mode));
46+
}
47+
48+
void CudaProfilerStart() { PADDLE_ENFORCE(cudaProfilerStart()); }
49+
50+
void CudaProfilerStop() { PADDLE_ENFORCE(cudaProfilerStop()); }
51+
52+
} // namespace platform
53+
} // namespace paddle

paddle/pybind/pybind.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ limitations under the License. */
3737

3838
#ifdef PADDLE_WITH_CUDA
3939
#include "paddle/operators/nccl/nccl_gpu_common.h"
40+
#include "paddle/platform/cuda_profiler.h"
4041
#include "paddle/platform/gpu_info.h"
4142
#endif
4243

@@ -460,6 +461,10 @@ All parameter, weight, gradient are variables in Paddle.
460461
m.def("op_support_gpu", OpSupportGPU);
461462
#ifdef PADDLE_WITH_CUDA
462463
m.def("get_cuda_device_count", platform::GetCUDADeviceCount);
464+
465+
m.def("nvprof_init", platform::CudaProfilerInit);
466+
m.def("nvprof_start", platform::CudaProfilerStart);
467+
m.def("nvprof_stop", platform::CudaProfilerStop);
463468
#endif
464469

465470
return m.ptr();

python/paddle/v2/fluid/profiler.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import paddle.v2.fluid.core as core
2+
from contextlib import contextmanager
3+
4+
__all__ = ['CudaProfiler']
5+
6+
NVPROF_CONFIG = [
7+
"gpustarttimestamp",
8+
"gpuendtimestamp",
9+
"gridsize3d",
10+
"threadblocksize",
11+
"streamid",
12+
"enableonstart 0",
13+
"conckerneltrace",
14+
]
15+
16+
17+
@contextmanager
18+
def cuda_profiler(output_file, output_mode=None, config=None):
19+
"""The CUDA profiler.
20+
This fuctions is used to profile CUDA program by CUDA runtime application
21+
programming interface. The profiling result will be written into
22+
`output_file` with Key-Value pair format or Comma separated values format.
23+
The user can set the output mode by `output_mode` argument and set the
24+
counters/options for profiling by `config` argument. The default config
25+
caontains 'gpustarttimestamp', 'gpustarttimestamp', 'gridsize3d',
26+
'threadblocksize', 'streamid', 'enableonstart 0', 'conckerneltrace'.
27+
28+
Args:
29+
output_file (string) : The output file name, the result will be
30+
written into this file.
31+
output_mode (string) : The output mode has Key-Value pair format and
32+
Comma separated values format. It should be 'kv' or 'csv'.
33+
config (string) : The profiler options and counters can refer to
34+
"Compute Command Line Profiler User Guide".
35+
"""
36+
if output_mode is None:
37+
output_mode = 'csv'
38+
if output_mode not in ['kv', 'csv']:
39+
raise ValueError("The output mode must be 'key-value' or 'csv'.")
40+
config = NVPROF_CONFIG if config is None else config
41+
core.nvprof_init(output_file, output_mode, config)
42+
# Enables profiler collection by the active CUDA profiling tool.
43+
core.nvprof_start()
44+
yield
45+
# Disables profiler collection.
46+
core.nvprof_stop()
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import unittest
2+
import numpy as np
3+
import paddle.v2.fluid as fluid
4+
import paddle.v2.fluid.profiler as profiler
5+
import paddle.v2.fluid.layers as layers
6+
7+
8+
class TestProfiler(unittest.TestCase):
9+
def test_nvprof(self):
10+
if not fluid.core.is_compile_gpu():
11+
return
12+
epoc = 8
13+
dshape = [4, 3, 28, 28]
14+
data = layers.data(name='data', shape=[3, 28, 28], dtype='float32')
15+
conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
16+
17+
place = fluid.GPUPlace(0)
18+
exe = fluid.Executor(place)
19+
exe.run(fluid.default_startup_program())
20+
21+
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
22+
for i in range(epoc):
23+
input = np.random.random(dshape).astype("float32")
24+
exe.run(fluid.default_main_program(), feed={'data': input})
25+
26+
27+
if __name__ == '__main__':
28+
unittest.main()

0 commit comments

Comments
 (0)