|
209 | 209 | # Self CPU time total: 23.015ms
|
210 | 210 | # Self CUDA time total: 11.666ms
|
211 | 211 | #
|
212 |
| -###################################################################### |
213 |
| - |
214 | 212 |
|
215 | 213 | ######################################################################
|
216 | 214 | # (Note: the first use of XPU profiling may bring an extra overhead.)
|
|
220 | 218 | #
|
221 | 219 | # .. code-block:: sh
|
222 | 220 | #
|
223 |
| -#------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ |
224 |
| -# Name Self XPU Self XPU % XPU total XPU time avg # of Calls |
225 |
| -# ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ |
226 |
| -# model_inference 0.000us 0.00% 2.567ms 2.567ms 1 |
227 |
| -# aten::conv2d 0.000us 0.00% 1.871ms 93.560us 20 |
228 |
| -# aten::convolution 0.000us 0.00% 1.871ms 93.560us 20 |
229 |
| -# aten::_convolution 0.000us 0.00% 1.871ms 93.560us 20 |
230 |
| -# aten::convolution_overrideable 1.871ms 72.89% 1.871ms 93.560us 20 |
231 |
| -# gen_conv 1.484ms 57.82% 1.484ms 74.216us 20 |
232 |
| -# aten::batch_norm 0.000us 0.00% 432.640us 21.632us 20 |
233 |
| -# aten::_batch_norm_impl_index 0.000us 0.00% 432.640us 21.632us 20 |
234 |
| -# aten::native_batch_norm 432.640us 16.85% 432.640us 21.632us 20 |
235 |
| -# conv_reorder 386.880us 15.07% 386.880us 6.448us 60 |
236 |
| -# ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ |
237 |
| -# Self CPU time total: 712.486ms |
238 |
| -# Self XPU time total: 2.567ms |
239 |
| - |
| 221 | +# ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ |
| 222 | +# Name Self XPU Self XPU % XPU total XPU time avg # of Calls |
| 223 | +# ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ |
| 224 | +# model_inference 0.000us 0.00% 2.567ms 2.567ms 1 |
| 225 | +# aten::conv2d 0.000us 0.00% 1.871ms 93.560us 20 |
| 226 | +# aten::convolution 0.000us 0.00% 1.871ms 93.560us 20 |
| 227 | +# aten::_convolution 0.000us 0.00% 1.871ms 93.560us 20 |
| 228 | +# aten::convolution_overrideable 1.871ms 72.89% 1.871ms 93.560us 20 |
| 229 | +# gen_conv 1.484ms 57.82% 1.484ms 74.216us 20 |
| 230 | +# aten::batch_norm 0.000us 0.00% 432.640us 21.632us 20 |
| 231 | +# aten::_batch_norm_impl_index 0.000us 0.00% 432.640us 21.632us 20 |
| 232 | +# aten::native_batch_norm 432.640us 16.85% 432.640us 21.632us 20 |
| 233 | +# conv_reorder 386.880us 15.07% 386.880us 6.448us 60 |
| 234 | +# ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ |
| 235 | +# Self CPU time total: 712.486ms |
| 236 | +# Self XPU time total: 2.567ms |
240 | 237 | #
|
241 | 238 |
|
242 |
| - |
243 | 239 | ######################################################################
|
244 | 240 | # Note the occurrence of on-device kernels in the output (e.g. ``sgemm_32x32x32_NN``).
|
245 | 241 |
|
|
0 commit comments