@@ -42,6 +42,9 @@ def cuda_profiler(output_file, output_mode=None, config=None):
42
42
counters/options for profiling by `config` argument. The default config
43
43
is ['gpustarttimestamp', 'gpustarttimestamp', 'gridsize3d',
44
44
'threadblocksize', 'streamid', 'enableonstart 0', 'conckerneltrace'].
45
+ Then users can use NVIDIA Visual Profiler
46
+ (https://developer.nvidia.com/nvidia-visual-profiler) tools to load this
47
+ this output file to visualize results.
45
48
46
49
Args:
47
50
output_file (string) : The output file name, the result will be
@@ -50,6 +53,33 @@ def cuda_profiler(output_file, output_mode=None, config=None):
50
53
Comma separated values format. It should be 'kvp' or 'csv'.
51
54
config (list of string) : The profiler options and counters can refer
52
55
to "Compute Command Line Profiler User Guide".
56
+
57
+ Raises:
58
+ ValueError: If `output_mode` is not in ['kvp', 'csv'].
59
+
60
+ Examples:
61
+
62
+ .. code-block:: python
63
+
64
+ import paddle.fluid as fluid
65
+ import paddle.fluid.profiler as profiler
66
+
67
+ epoc = 8
68
+ dshape = [4, 3, 28, 28]
69
+ data = fluid.layers.data(name='data', shape=[3, 28, 28], dtype='float32')
70
+ conv = fluid.layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
71
+
72
+ place = fluid.CUDAPlace(0)
73
+ exe = fluid.Executor(place)
74
+ exe.run(fluid.default_startup_program())
75
+
76
+ output_file = 'cuda_profiler.txt'
77
+ with profiler.cuda_profiler(output_file, 'csv') as nvprof:
78
+ for i in range(epoc):
79
+ input = np.random.random(dshape).astype('float32')
80
+ exe.run(fluid.default_main_program(), feed={'data': input})
81
+ # then use NVIDIA Visual Profiler (nvvp) to load this output file
82
+ # to visualize results.
53
83
"""
54
84
if output_mode is None :
55
85
output_mode = 'csv'
@@ -69,19 +99,52 @@ def cuda_profiler(output_file, output_mode=None, config=None):
69
99
70
100
71
101
def reset_profiler ():
72
- """The profiler clear interface.
73
- reset_profiler will clear the previous time record.
102
+ """
103
+ Clear the previous time record. This interface does not work for
104
+ `fluid.profiler.cuda_profiler`, it only works for
105
+ `fluid.profiler.start_profiler`, `fluid.profiler.stop_profiler`,
106
+ and `fluid.profiler.profiler`.
107
+
108
+ Examples:
109
+
110
+ .. code-block:: python
111
+
112
+ import paddle.fluid.profiler as profiler
113
+ with profiler.profiler(state, 'total', '/tmp/profile'):
114
+ for iter in range(10):
115
+ if iter == 2:
116
+ profiler.reset_profiler()
117
+ # ...
74
118
"""
75
119
core .reset_profiler ()
76
120
77
121
78
122
def start_profiler (state ):
79
- """Enable the profiler.
123
+ """
124
+ Enable the profiler. Uers can use `fluid.profiler.start_profiler` and
125
+ `fluid.profiler.stop_profiler` to insert the code, except the usage of
126
+ `fluid.profiler.profiler` interface.
80
127
81
128
Args:
82
129
state (string) : The profiling state, which should be 'CPU', 'GPU'
83
130
or 'All'. 'CPU' means only profile CPU. 'GPU' means profiling
84
131
GPU as well. 'All' also generates timeline.
132
+
133
+ Raises:
134
+ ValueError: If `state` is not in ['CPU', 'GPU', 'All'].
135
+
136
+ Examples:
137
+
138
+ .. code-block:: python
139
+
140
+ import paddle.fluid.profiler as profiler
141
+
142
+ profiler.start_profiler('GPU')
143
+ for iter in range(10):
144
+ if iter == 2:
145
+ profiler.reset_profiler()
146
+ # except each iteration
147
+ profiler.stop_profiler('total', '/tmp/profile')
85
148
"""
86
149
if core .is_profiler_enabled ():
87
150
return
@@ -97,7 +160,10 @@ def start_profiler(state):
97
160
98
161
99
162
def stop_profiler (sorted_key = None , profile_path = '/tmp/profile' ):
100
- """Stop the profiler.
163
+ """
164
+ Stop the profiler. Uers can use `fluid.profiler.start_profiler` and
165
+ `fluid.profiler.stop_profiler` to insert the code, except the usage of
166
+ `fluid.profiler.profiler` interface.
101
167
102
168
Args:
103
169
sorted_key (string) : If None, the profiling results will be printed
@@ -111,6 +177,23 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'):
111
177
The `ave` means sorting by the average execution time.
112
178
profile_path (string) : If state == 'All', it will write a profile
113
179
proto output file.
180
+
181
+ Raises:
182
+ ValueError: If `sorted_key` is not in
183
+ ['calls', 'total', 'max', 'min', 'ave'].
184
+
185
+ Examples:
186
+
187
+ .. code-block:: python
188
+
189
+ import paddle.fluid.profiler as profiler
190
+
191
+ profiler.start_profiler('GPU')
192
+ for iter in range(10):
193
+ if iter == 2:
194
+ profiler.reset_profiler()
195
+ # except each iteration
196
+ profiler.stop_profiler('total', '/tmp/profile')
114
197
"""
115
198
if not core .is_profiler_enabled ():
116
199
return
@@ -137,7 +220,12 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
137
220
Different from cuda_profiler, this profiler can be used to profile both CPU
138
221
and GPU program. By defalut, it records the CPU and GPU operator kernels,
139
222
if you want to profile other program, you can refer the profiling tutorial
140
- to add more records.
223
+ to add more records in C++ code.
224
+
225
+ If the state == 'All', a profile proto file will be written to
226
+ `profile_path`. This file records timeline information during the execution.
227
+ Then users can visualize this file to see the timeline, please refer
228
+ https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/howto/optimization/timeline.md
141
229
142
230
Args:
143
231
state (string) : The profiling state, which should be 'CPU' or 'GPU',
@@ -156,6 +244,25 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
156
244
The `ave` means sorting by the average execution time.
157
245
profile_path (string) : If state == 'All', it will write a profile
158
246
proto output file.
247
+
248
+ Raises:
249
+ ValueError: If `state` is not in ['CPU', 'GPU', 'All']. If `sorted_key` is
250
+ not in ['calls', 'total', 'max', 'min', 'ave'].
251
+
252
+ Examples:
253
+
254
+ .. code-block:: python
255
+
256
+ import paddle.fluid.profiler as profiler
257
+
258
+ with profiler.profiler('All', 'total', '/tmp/profile') as prof:
259
+ for pass_id in range(pass_num):
260
+ for batch_id, data in enumerate(train_reader()):
261
+ exe.run(fluid.default_main_program(),
262
+ feed=feeder.feed(data),
263
+ fetch_list=[],
264
+ use_program_cache=True)
265
+ # ...
159
266
"""
160
267
start_profiler (state )
161
268
yield
0 commit comments