-
Notifications
You must be signed in to change notification settings - Fork 67
Expand file tree
/
Copy pathresnet_18_16s_benchmark.cpp
More file actions
78 lines (48 loc) · 1.74 KB
/
resnet_18_16s_benchmark.cpp
File metadata and controls
78 lines (48 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/*
Example shows how to measure the average execution time spent on one image.
Here we test resnet 18 with the output stride of 16 which shows execution time of 10.42 ms
per frame of size 512x512 on average.
*/
#include "ATen/ATen.h"
#include "ATen/Type.h"
#include <map>
#include <pytorch.cpp>
#include <iostream>
#include <chrono>
#include <cuda_runtime.h>
using namespace at;
using std::map;
using std::string;
using namespace std;
using namespace std::chrono;
int main()
{
// The reason we do a first run before measuring the time is
// because first run is slow and doesn't represent the actual speed.
auto net = torch::resnet18_16s_pascal_voc();
net->cuda();
Tensor dummy_input = CUDA(kFloat).ones({1, 3, 512, 512});
high_resolution_clock::time_point t1;
high_resolution_clock::time_point t2;
cudaDeviceSynchronize();
t1 = high_resolution_clock::now();
auto result = net->forward(dummy_input);
cudaDeviceSynchronize();
t2 = high_resolution_clock::now();
auto duration = duration_cast<milliseconds>( t2 - t1 ).count();
// Now running in a loop and getting an average result.
int number_of_iterations = 100;
int overall_miliseconds_count = 0;
for (int i = 0; i < number_of_iterations; ++i)
{
t1 = high_resolution_clock::now();
result = net->forward(dummy_input);
cudaDeviceSynchronize();
t2 = high_resolution_clock::now();
duration = duration_cast<milliseconds>( t2 - t1 ).count();
overall_miliseconds_count += duration;
}
cout << "Average execution time: " << overall_miliseconds_count / float(number_of_iterations) << " ms" << endl;
// On our system it outpts: 10.42 ms per frame.
return 0;
}