Skip to content

Commit 6cf2dcb

Browse files
committed
Add cuda profiler tools.
1 parent 1f6002e commit 6cf2dcb

File tree

4 files changed

+151
-0
lines changed

4 files changed

+151
-0
lines changed

paddle/platform/cuda_profiler.h

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
#include <cuda_profiler_api.h>
17+
#include <stdlib.h>
18+
#include <string.h>
19+
20+
namespace paddle {
21+
namespace platform {
22+
23+
static std::vector<std::string> kCudaProfileConfiguration = {
24+
"gpustarttimestamp",
25+
"gpuendtimestamp",
26+
"gridsize3d",
27+
"threadblocksize",
28+
"dynsmemperblock",
29+
"stasmemperblock",
30+
"regperthread",
31+
"memtransfersize",
32+
"memtransferdir",
33+
"memtransferhostmemtype",
34+
"streamid",
35+
"cacheconfigrequested",
36+
"cacheconfigexecuted",
37+
"countermodeaggregate",
38+
"enableonstart 0",
39+
"active_warps",
40+
"active_cycles",
41+
};
42+
43+
void CudaProfilerInit(std::string output_file, std::string output_mode) {
44+
std::array<char, 128> buf;
45+
std::string tmpl = "/tmp/cuda_profile_config.XXXXXX";
46+
PADDLE_ENFORCE_LT(tmpl.size(), buf.size());
47+
memcpy(buf.data(), tmpl.data(), tmpl.size());
48+
auto result = mktemp(buf.data());
49+
PADDLE_ENFORCE(strlen(result) != 0);
50+
std::string config = result;
51+
52+
{
53+
std::ofstream ofs(config, std::ios::out | std::ios::trunc);
54+
PADDLE_ENFORCE(ofs.is_open(), "ofstream: ", ofs.rdstate());
55+
for (const auto& line : kCudaProfileConfiguration) {
56+
ofs << line << std::endl;
57+
}
58+
}
59+
60+
PADDLE_ENFORCE(output_mode == "key_value" || output_mode == "csv");
61+
cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair;
62+
PADDLE_ENFORCE(
63+
cudaProfilerInitialize(config.c_str(), output_file.c_str(), mode));
64+
}
65+
66+
void CudaProfilerStart() { PADDLE_ENFORCE(cudaProfilerStart()); }
67+
68+
void CudaProfilerStop() { PADDLE_ENFORCE((cudaProfilerStop())); }
69+
}
70+
}

paddle/pybind/pybind.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ limitations under the License. */
3737

3838
#ifdef PADDLE_WITH_CUDA
3939
#include "paddle/operators/nccl/nccl_gpu_common.h"
40+
#include "paddle/platform/cuda_profiler.h"
4041
#include "paddle/platform/gpu_info.h"
4142
#endif
4243

@@ -460,6 +461,10 @@ All parameter, weight, gradient are variables in Paddle.
460461
m.def("op_support_gpu", OpSupportGPU);
461462
#ifdef PADDLE_WITH_CUDA
462463
m.def("get_cuda_device_count", platform::GetCUDADeviceCount);
464+
465+
m.def("nvprof_init", platform::CudaProfilerInit);
466+
m.def("nvprof_start", platform::CudaProfilerStart);
467+
m.def("nvprof_stop", platform::CudaProfilerStop);
463468
#endif
464469

465470
return m.ptr();

python/paddle/v2/fluid/profiler.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import paddle.v2.fluid.core as core
2+
3+
4+
def nvporf_init(output_file, output_mode=None):
5+
"""
6+
Initialize the CUDA profiler.
7+
This methods must be called before nvprof_start.
8+
9+
:param output_file: The output file name.
10+
:type output_file: string
11+
:param output_mode: The output mode has Key-Value pair format and
12+
Comma separated values format.
13+
It should be 'key-value' or 'csv'.
14+
:type output_mode: string
15+
"""
16+
if output_mode is None:
17+
output_mode = 'csv'
18+
if output_mode != 'key-value' or output_mode != 'csv':
19+
raise ValueError("The output mode must be 'key-value' or 'csv'.")
20+
core.nvprof_init(output_file, output_mode)
21+
22+
23+
def nvporf_start():
24+
"""
25+
Enables profiler collection by the active CUDA profiling tool.
26+
"""
27+
core.nvprof_start()
28+
29+
30+
def nvporf_stop():
31+
"""
32+
Disables profiler collection.
33+
"""
34+
core.nvprof_stop()
35+
36+
37+
class profiler(object):
38+
def __init__(self, output_file, output_mode=None, enabled=True):
39+
self.enabled = enabled
40+
if not self.enabled:
41+
return
42+
self.entered = False
43+
nvporf_init(output_file, output_mode)
44+
45+
def __enter__(self):
46+
if not self.enabled:
47+
return
48+
if self.entered:
49+
raise RuntimeError("The profiler traces are not reentrant")
50+
self.entered = True
51+
nvporf_start()
52+
return self
53+
54+
def __exit__(self, exc_type, exc_value, tb):
55+
if exc_value is not None:
56+
raise exc_value
57+
if not self.enabled:
58+
return
59+
nvporf_stop()
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import paddle.v2.fluid.profiler as profiler
2+
import paddle.v2.fluid.layers as layers
3+
import numpy as np
4+
5+
place = core.GPUPlace(0)
6+
exe = Executor(place)
7+
8+
epoc = 8
9+
dshape = [4, 3, 28, 28]
10+
data = layers.data(name='data', shape=dshape, dtype='float32')
11+
conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
12+
13+
input = core.LoDTensor()
14+
with profiler("cuda_profiler.txt") as nvprof:
15+
for i in range(epoc):
16+
input.set(np.random.random(dshape).astype("float32"), place)
17+
exe.run(framework.default_main_program(), feed={'data': data})

0 commit comments

Comments
 (0)