16
16
from contextlib import contextmanager
17
17
import os
18
18
19
- __all__ = ['cuda_profiler' , 'reset_profiler' , 'profiler' ]
19
+ __all__ = [
20
+ 'cuda_profiler' , 'reset_profiler' , 'profiler' , 'start_profiler' ,
21
+ 'stop_profiler'
22
+ ]
20
23
21
24
NVPROF_CONFIG = [
22
25
"gpustarttimestamp" ,
@@ -72,20 +75,31 @@ def reset_profiler():
72
75
core .reset_profiler ()
73
76
74
77
75
- @contextmanager
76
- def profiler (state , sorted_key = None , profile_path = '/tmp/profile' ):
77
- """The profiler interface.
78
- Different from cuda_profiler, this profiler can be used to profile both CPU
79
- and GPU program. By defalut, it records the CPU and GPU operator kernels,
80
- if you want to profile other program, you can refer the profiling tutorial
81
- to add more records.
78
+ def start_profiler (state ):
79
+ """Enable the profiler.
80
+
81
+ Args:
82
+ state (string) : The profiling state, which should be 'CPU', 'GPU'
83
+ or 'All'. 'CPU' means only profile CPU. 'GPU' means profiling
84
+ GPU as well. 'All' also generates timeline.
85
+ """
86
+ if core .is_profiler_enabled ():
87
+ return
88
+ if state not in ['CPU' , 'GPU' , "All" ]:
89
+ raise ValueError ("The state must be 'CPU' or 'GPU' or 'All'." )
90
+ if state == "GPU" :
91
+ prof_state = core .ProfilerState .kCUDA
92
+ elif state == "CPU" :
93
+ prof_state = core .ProfilerState .kCPU
94
+ else :
95
+ prof_state = core .ProfilerState .kAll
96
+ core .enable_profiler (prof_state )
97
+
98
+
99
+ def stop_profiler (sorted_key = None , profile_path = '/tmp/profile' ):
100
+ """Stop the profiler.
82
101
83
102
Args:
84
- state (string) : The profiling state, which should be 'CPU' or 'GPU',
85
- telling the profiler to use CPU timer or GPU timer for profiling.
86
- Although users may have already specified the execution place
87
- (CPUPlace/CUDAPlace) in the begining, for flexibility the profiler
88
- would not inherit this place.
89
103
sorted_key (string) : If None, the profiling results will be printed
90
104
in the order of first end time of events. Otherwise, the profiling
91
105
results will be sorted by the this flag. This flag should be one
@@ -98,17 +112,8 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
98
112
profile_path (string) : If state == 'All', it will write a profile
99
113
proto output file.
100
114
"""
101
- if state not in ['CPU' , 'GPU' , "All" ]:
102
- raise ValueError ("The state must be 'CPU' or 'GPU' or 'All'." )
103
- if state == "GPU" :
104
- prof_state = core .ProfilerState .kCUDA
105
- elif state == "CPU" :
106
- prof_state = core .ProfilerState .kCPU
107
- else :
108
- prof_state = core .ProfilerState .kAll
109
- core .enable_profiler (prof_state )
110
- yield
111
-
115
+ if not core .is_profiler_enabled ():
116
+ return
112
117
sorted_key = 'default' if sorted_key is None else sorted_key
113
118
if sorted_key not in ['default' , 'calls' , 'total' , 'max' , 'min' , 'ave' ]:
114
119
raise ValueError ("The sorted_key must be None or in 'calls', 'total', "
@@ -124,3 +129,34 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
124
129
# TODO(qingqing) : redirect C++ ostream to Python stream.
125
130
# with core.ostream_redirect(stdout=True, stderr=True):
126
131
core .disable_profiler (key_map [sorted_key ], profile_path )
132
+
133
+
134
+ @contextmanager
135
+ def profiler (state , sorted_key = None , profile_path = '/tmp/profile' ):
136
+ """The profiler interface.
137
+ Different from cuda_profiler, this profiler can be used to profile both CPU
138
+ and GPU program. By defalut, it records the CPU and GPU operator kernels,
139
+ if you want to profile other program, you can refer the profiling tutorial
140
+ to add more records.
141
+
142
+ Args:
143
+ state (string) : The profiling state, which should be 'CPU' or 'GPU',
144
+ telling the profiler to use CPU timer or GPU timer for profiling.
145
+ Although users may have already specified the execution place
146
+ (CPUPlace/CUDAPlace) in the begining, for flexibility the profiler
147
+ would not inherit this place.
148
+ sorted_key (string) : If None, the profiling results will be printed
149
+ in the order of first end time of events. Otherwise, the profiling
150
+ results will be sorted by the this flag. This flag should be one
151
+ of 'calls', 'total', 'max', 'min' or 'ave'.
152
+ The `calls` means sorting by the number of calls.
153
+ The `total` means sorting by the total execution time.
154
+ The `max` means sorting by the maximum execution time.
155
+ The `min` means sorting by the minimum execution time.
156
+ The `ave` means sorting by the average execution time.
157
+ profile_path (string) : If state == 'All', it will write a profile
158
+ proto output file.
159
+ """
160
+ start_profiler (state )
161
+ yield
162
+ stop_profiler (sorted_key , profile_path )
0 commit comments